package scraper import ( "context" "crypto/sha256" "encoding/hex" "fmt" "io" "net/http" "strings" "time" "github.com/yourorg/devour/pkg/nuxtdocs" ) type NuxtDocsScraper struct { config *Config parser *nuxtdocs.Parser client *http.Client } func NewNuxtDocsScraper(config *Config) *NuxtDocsScraper { return &NuxtDocsScraper{ config: config, parser: nuxtdocs.NewParser(), client: &http.Client{ Timeout: config.Timeout, }, } } func (s *NuxtDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) { var documents []*Document if source.URL == "" { return nil, fmt.Errorf("URL is required for Nuxt docs scraper") } html, err := s.fetchPage(ctx, source.URL) if err != nil { return nil, fmt.Errorf("failed to fetch page: %w", err) } ref, err := s.parser.ParseReferencePage(html, source.URL) if err != nil { return nil, fmt.Errorf("failed to parse reference: %w", err) } mainDoc := s.referenceToDocument(ref, source.Name) documents = append(documents, mainDoc) for _, comp := range ref.Components { doc := s.componentToDocument(comp, source.Name) documents = append(documents, doc) } for _, comp := range ref.Composables { doc := s.composableToDocument(comp, source.Name) documents = append(documents, doc) } for _, util := range ref.Utilities { doc := s.utilityToDocument(util, source.Name) documents = append(documents, doc) } for _, cfg := range ref.Configs { doc := s.configToDocument(cfg, source.Name) documents = append(documents, doc) } for _, cmd := range ref.Commands { doc := s.commandToDocument(cmd, source.Name) documents = append(documents, doc) } return documents, nil } func (s *NuxtDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) { html, err := s.fetchPage(ctx, source.URL) if err != nil { return false, "", err } hash := s.generateHash(html) changed := hash != lastHash return changed, hash, nil } func (s *NuxtDocsScraper) fetchPage(ctx context.Context, url string) (string, error) { req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { return "", err } req.Header.Set("User-Agent", s.config.UserAgent) resp, err := s.client.Do(req) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("HTTP %d", resp.StatusCode) } body, err := io.ReadAll(resp.Body) if err != nil { return "", err } return string(body), nil } func (s *NuxtDocsScraper) generateHash(content string) string { hash := sha256.Sum256([]byte(content)) return hex.EncodeToString(hash[:]) } func (s *NuxtDocsScraper) referenceToDocument(ref *nuxtdocs.Reference, sourceName string) *Document { var content strings.Builder fmt.Fprintf(&content, "# Nuxt API Reference\n\n") fmt.Fprintf(&content, "Components: %d, Composables: %d, Utilities: %d, Configs: %d, Commands: %d\n", len(ref.Components), len(ref.Composables), len(ref.Utilities), len(ref.Configs), len(ref.Commands)) return &Document{ ID: generateDocID(ref.DocURL), Source: sourceName, Type: "nuxt-reference", Title: "Nuxt API Reference", Content: content.String(), URL: ref.DocURL, Metadata: map[string]interface{}{"doc_type": "nuxt-reference"}, Hash: s.generateHash(content.String()), Timestamp: time.Now(), } } func (s *NuxtDocsScraper) componentToDocument(comp *nuxtdocs.Component, sourceName string) *Document { var content strings.Builder fmt.Fprintf(&content, "# <%s />\n\n", comp.Name) fmt.Fprintf(&content, "%s\n", comp.Doc) if len(comp.Props) > 0 { fmt.Fprintf(&content, "\n## Props\n") for _, p := range comp.Props { fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type) } } metadata := map[string]interface{}{ "name": comp.Name, "category": comp.Category, "doc_url": comp.DocURL, "doc_type": "nuxt-component", } return &Document{ ID: generateDocID(comp.DocURL), Source: sourceName, Type: "nuxt-component", Title: comp.Name, Content: content.String(), URL: comp.DocURL, Metadata: metadata, Hash: s.generateHash(content.String()), Timestamp: time.Now(), } } func (s *NuxtDocsScraper) composableToDocument(comp *nuxtdocs.Composable, sourceName string) *Document { var content strings.Builder fmt.Fprintf(&content, "# %s\n\n", comp.Name) if comp.Signature != "" { fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", comp.Signature) } fmt.Fprintf(&content, "%s\n", comp.Doc) if comp.Returns != "" { fmt.Fprintf(&content, "\n**Returns:** `%s`\n", comp.Returns) } metadata := map[string]interface{}{ "name": comp.Name, "category": comp.Category, "doc_url": comp.DocURL, "doc_type": "nuxt-composable", } return &Document{ ID: generateDocID(comp.DocURL), Source: sourceName, Type: "nuxt-composable", Title: comp.Name, Content: content.String(), URL: comp.DocURL, Metadata: metadata, Hash: s.generateHash(content.String()), Timestamp: time.Now(), } } func (s *NuxtDocsScraper) utilityToDocument(util *nuxtdocs.Utility, sourceName string) *Document { var content strings.Builder fmt.Fprintf(&content, "# %s\n\n", util.Name) if util.Signature != "" { fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", util.Signature) } fmt.Fprintf(&content, "%s\n", util.Doc) metadata := map[string]interface{}{ "name": util.Name, "doc_url": util.DocURL, "doc_type": "nuxt-utility", } return &Document{ ID: generateDocID(util.DocURL), Source: sourceName, Type: "nuxt-utility", Title: util.Name, Content: content.String(), URL: util.DocURL, Metadata: metadata, Hash: s.generateHash(content.String()), Timestamp: time.Now(), } } func (s *NuxtDocsScraper) configToDocument(cfg *nuxtdocs.Config, sourceName string) *Document { var content strings.Builder fmt.Fprintf(&content, "# %s\n\n", cfg.Name) if cfg.Type != "" { fmt.Fprintf(&content, "Type: `%s`\n\n", cfg.Type) } if cfg.Default != "" { fmt.Fprintf(&content, "Default: `%s`\n\n", cfg.Default) } fmt.Fprintf(&content, "%s\n", cfg.Doc) metadata := map[string]interface{}{ "name": cfg.Name, "type": cfg.Type, "default": cfg.Default, "category": cfg.Category, "doc_url": cfg.DocURL, "doc_type": "nuxt-config", } return &Document{ ID: generateDocID(cfg.DocURL), Source: sourceName, Type: "nuxt-config", Title: cfg.Name, Content: content.String(), URL: cfg.DocURL, Metadata: metadata, Hash: s.generateHash(content.String()), Timestamp: time.Now(), } } func (s *NuxtDocsScraper) commandToDocument(cmd *nuxtdocs.Command, sourceName string) *Document { var content strings.Builder fmt.Fprintf(&content, "# %s\n\n", cmd.Name) if cmd.Usage != "" { fmt.Fprintf(&content, "```\n%s\n```\n\n", cmd.Usage) } fmt.Fprintf(&content, "%s\n", cmd.Doc) if len(cmd.Flags) > 0 { fmt.Fprintf(&content, "\n## Flags\n") for _, f := range cmd.Flags { fmt.Fprintf(&content, "- `--%s`: %s\n", f.Name, f.Doc) } } metadata := map[string]interface{}{ "name": cmd.Name, "usage": cmd.Usage, "doc_url": cmd.DocURL, "doc_type": "nuxt-command", } return &Document{ ID: generateDocID(cmd.DocURL), Source: sourceName, Type: "nuxt-command", Title: cmd.Name, Content: content.String(), URL: cmd.DocURL, Metadata: metadata, Hash: s.generateHash(content.String()), Timestamp: time.Now(), } }