Files
Devour/internal/scraper/vuedocs.go
Tomas Dvorak 55885a0e8f first commit
2026-02-22 10:42:17 +01:00

245 lines
6.0 KiB
Go

package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/vuedocs"
)
type VueDocsScraper struct {
config *Config
parser *vuedocs.Parser
client *http.Client
}
func NewVueDocsScraper(config *Config) *VueDocsScraper {
return &VueDocsScraper{
config: config,
parser: vuedocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *VueDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Vue docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
ref, err := s.parser.ParseReferencePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse reference: %w", err)
}
mainDoc := s.referenceToDocument(ref, source.Name)
documents = append(documents, mainDoc)
for _, comp := range ref.Composition {
doc := s.compositionToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, dir := range ref.Directives {
doc := s.directiveToDocument(dir, source.Name)
documents = append(documents, doc)
}
for _, comp := range ref.Components {
doc := s.componentToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, api := range ref.GlobalAPI {
doc := s.globalAPIToDocument(api, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *VueDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *VueDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *VueDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *VueDocsScraper) referenceToDocument(ref *vuedocs.Reference, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Vue API Reference\n\n")
fmt.Fprintf(&content, "Composition API: %d, Directives: %d, Components: %d\n", len(ref.Composition), len(ref.Directives), len(ref.Components))
return &Document{
ID: generateDocID(ref.DocURL),
Source: sourceName,
Type: "vue-reference",
Title: "Vue API Reference",
Content: content.String(),
URL: ref.DocURL,
Metadata: map[string]interface{}{"doc_type": "vue-reference"},
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) compositionToDocument(comp *vuedocs.Composition, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", comp.Name)
if comp.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", comp.Signature)
}
fmt.Fprintf(&content, "%s\n", comp.Doc)
metadata := map[string]interface{}{
"name": comp.Name,
"kind": comp.Kind,
"doc_url": comp.DocURL,
"doc_type": "vue-composition",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "vue-composition",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) directiveToDocument(dir *vuedocs.Directive, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", dir.Name)
fmt.Fprintf(&content, "%s\n", dir.Doc)
metadata := map[string]interface{}{
"name": dir.Name,
"doc_url": dir.DocURL,
"doc_type": "vue-directive",
}
return &Document{
ID: generateDocID(dir.DocURL),
Source: sourceName,
Type: "vue-directive",
Title: dir.Name,
Content: content.String(),
URL: dir.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) componentToDocument(comp *vuedocs.Component, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# <%s />\n\n", comp.Name)
fmt.Fprintf(&content, "%s\n", comp.Doc)
if len(comp.Props) > 0 {
fmt.Fprintf(&content, "\n## Props\n")
for _, p := range comp.Props {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"name": comp.Name,
"doc_url": comp.DocURL,
"doc_type": "vue-component",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "vue-component",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) globalAPIToDocument(api *vuedocs.API, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", api.Name)
if api.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", api.Signature)
}
fmt.Fprintf(&content, "%s\n", api.Doc)
metadata := map[string]interface{}{
"name": api.Name,
"category": api.Category,
"doc_url": api.DocURL,
"doc_type": "vue-api",
}
return &Document{
ID: generateDocID(api.DocURL),
Source: sourceName,
Type: "vue-api",
Title: api.Name,
Content: content.String(),
URL: api.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}