This commit is contained in:
Tomas Dvorak
2026-02-22 15:41:27 +01:00
parent 0b88627e54
commit 409acd2e08
84 changed files with 65382 additions and 27475 deletions
+156
View File
@@ -0,0 +1,156 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/astrodocs"
)
type AstroDocsScraper struct {
config *Config
parser *astrodocs.Parser
client *http.Client
}
func NewAstroDocsScraper(config *Config) *AstroDocsScraper {
return &AstroDocsScraper{
config: config,
parser: astrodocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *AstroDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Astro docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
page, err := s.parser.ParsePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse Astro docs page: %w", err)
}
mainDoc := s.pageToDocument(page, source.Name)
documents = append(documents, mainDoc)
for _, section := range page.Sections {
doc := s.sectionToDocument(section, page, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *AstroDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *AstroDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *AstroDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *AstroDocsScraper) pageToDocument(page *astrodocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", page.Title)
fmt.Fprintf(&content, "%s\n", page.Description)
if len(page.CodeBlocks) > 0 {
fmt.Fprintf(&content, "\n## Code Examples\n")
for _, cb := range page.CodeBlocks {
fmt.Fprintf(&content, "\n```%s\n%s\n```\n", cb.Language, cb.Code)
}
}
metadata := map[string]interface{}{
"title": page.Title,
"doc_url": page.URL,
"doc_type": "astro-docs",
}
return &Document{
ID: generateDocID(page.URL),
Source: sourceName,
Type: "astro-docs",
Title: page.Title,
Content: content.String(),
URL: page.URL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *AstroDocsScraper) sectionToDocument(section *astrodocs.Section, page *astrodocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", section.Title)
fmt.Fprintf(&content, "%s\n", section.Content)
metadata := map[string]interface{}{
"page_title": page.Title,
"section_id": section.ID,
"doc_url": section.DocURL,
"doc_type": "astro-section",
}
return &Document{
ID: generateDocID(section.DocURL),
Source: sourceName,
Type: "astro-section",
Title: fmt.Sprintf("%s - %s", page.Title, section.Title),
Content: content.String(),
URL: section.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+193
View File
@@ -0,0 +1,193 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/cloudflaredocs"
)
type CloudflareDocsScraper struct {
config *Config
parser *cloudflaredocs.Parser
client *http.Client
}
func NewCloudflareDocsScraper(config *Config) *CloudflareDocsScraper {
return &CloudflareDocsScraper{
config: config,
parser: cloudflaredocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *CloudflareDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Cloudflare docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
page, err := s.parser.ParsePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse Cloudflare docs page: %w", err)
}
mainDoc := s.pageToDocument(page, source.Name)
documents = append(documents, mainDoc)
for _, section := range page.Sections {
doc := s.sectionToDocument(section, page, source.Name)
documents = append(documents, doc)
}
for _, api := range page.APIs {
doc := s.apiToDocument(api, page, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *CloudflareDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *CloudflareDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *CloudflareDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *CloudflareDocsScraper) pageToDocument(page *cloudflaredocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", page.Title)
if page.Product != "" {
fmt.Fprintf(&content, "Product: %s\n\n", page.Product)
}
fmt.Fprintf(&content, "%s\n", page.Description)
if len(page.CodeBlocks) > 0 {
fmt.Fprintf(&content, "\n## Code Examples\n")
for _, cb := range page.CodeBlocks {
fmt.Fprintf(&content, "\n```%s\n%s\n```\n", cb.Language, cb.Code)
}
}
metadata := map[string]interface{}{
"title": page.Title,
"product": page.Product,
"doc_url": page.URL,
"doc_type": "cloudflare-docs",
}
return &Document{
ID: generateDocID(page.URL),
Source: sourceName,
Type: "cloudflare-docs",
Title: page.Title,
Content: content.String(),
URL: page.URL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *CloudflareDocsScraper) sectionToDocument(section *cloudflaredocs.Section, page *cloudflaredocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", section.Title)
fmt.Fprintf(&content, "%s\n", section.Content)
metadata := map[string]interface{}{
"page_title": page.Title,
"product": page.Product,
"section_id": section.ID,
"doc_url": section.DocURL,
"doc_type": "cloudflare-section",
}
return &Document{
ID: generateDocID(section.DocURL),
Source: sourceName,
Type: "cloudflare-section",
Title: fmt.Sprintf("%s - %s", page.Title, section.Title),
Content: content.String(),
URL: section.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *CloudflareDocsScraper) apiToDocument(api *cloudflaredocs.API, page *cloudflaredocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s %s\n\n", api.Method, api.Endpoint)
fmt.Fprintf(&content, "%s\n", api.Description)
metadata := map[string]interface{}{
"page_title": page.Title,
"product": page.Product,
"method": api.Method,
"endpoint": api.Endpoint,
"doc_url": api.DocURL,
"doc_type": "cloudflare-api",
}
return &Document{
ID: generateDocID(api.DocURL + "#" + api.Endpoint),
Source: sourceName,
Type: "cloudflare-api",
Title: fmt.Sprintf("%s %s", api.Method, api.Endpoint),
Content: content.String(),
URL: api.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+156
View File
@@ -0,0 +1,156 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/dockerdocs"
)
type DockerDocsScraper struct {
config *Config
parser *dockerdocs.Parser
client *http.Client
}
func NewDockerDocsScraper(config *Config) *DockerDocsScraper {
return &DockerDocsScraper{
config: config,
parser: dockerdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *DockerDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Docker docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
page, err := s.parser.ParsePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse Docker docs page: %w", err)
}
mainDoc := s.pageToDocument(page, source.Name)
documents = append(documents, mainDoc)
for _, section := range page.Sections {
doc := s.sectionToDocument(section, page, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *DockerDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *DockerDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *DockerDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *DockerDocsScraper) pageToDocument(page *dockerdocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", page.Title)
fmt.Fprintf(&content, "%s\n", page.Description)
if len(page.CodeBlocks) > 0 {
fmt.Fprintf(&content, "\n## Code Examples\n")
for _, cb := range page.CodeBlocks {
fmt.Fprintf(&content, "\n```%s\n%s\n```\n", cb.Language, cb.Code)
}
}
metadata := map[string]interface{}{
"title": page.Title,
"doc_url": page.URL,
"doc_type": "docker-docs",
}
return &Document{
ID: generateDocID(page.URL),
Source: sourceName,
Type: "docker-docs",
Title: page.Title,
Content: content.String(),
URL: page.URL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *DockerDocsScraper) sectionToDocument(section *dockerdocs.Section, page *dockerdocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", section.Title)
fmt.Fprintf(&content, "%s\n", section.Content)
metadata := map[string]interface{}{
"page_title": page.Title,
"section_id": section.ID,
"doc_url": section.DocURL,
"doc_type": "docker-section",
}
return &Document{
ID: generateDocID(section.DocURL),
Source: sourceName,
Type: "docker-section",
Title: fmt.Sprintf("%s - %s", page.Title, section.Title),
Content: content.String(),
URL: section.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+423
View File
@@ -0,0 +1,423 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/godocs"
)
type GoDocsScraper struct {
config *Config
parser *godocs.Parser
client *http.Client
}
func NewGoDocsScraper(config *Config) *GoDocsScraper {
return &GoDocsScraper{
config: config,
parser: godocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *GoDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Go docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
pkg, err := s.parser.ParsePackagePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse package: %w", err)
}
mainDoc := s.packageToDocument(pkg, source.Name)
documents = append(documents, mainDoc)
for _, fn := range pkg.Functions {
doc := s.functionToDocument(fn, pkg, source.Name)
documents = append(documents, doc)
}
for _, t := range pkg.Types {
doc := s.typeToDocument(t, pkg, source.Name)
documents = append(documents, doc)
for _, m := range t.Methods {
methodDoc := s.methodToDocument(m, t, pkg, source.Name)
documents = append(documents, methodDoc)
}
}
for _, c := range pkg.Constants {
doc := s.constantToDocument(c, pkg, source.Name)
documents = append(documents, doc)
}
for _, v := range pkg.Variables {
doc := s.variableToDocument(v, pkg, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *GoDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *GoDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *GoDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *GoDocsScraper) packageToDocument(pkg *godocs.Package, sourceName string) *Document {
content := s.buildPackageContent(pkg)
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"version": pkg.Version,
"imported_by": pkg.ImportedBy,
"repository": pkg.Repository,
"doc_url": pkg.DocURL,
}
if pkg.Module != nil {
metadata["module_path"] = pkg.Module.Path
metadata["module_version"] = pkg.Module.Version
}
if len(pkg.Licenses) > 0 {
var licenses []string
for _, l := range pkg.Licenses {
licenses = append(licenses, l.Name)
}
metadata["licenses"] = licenses
}
return &Document{
ID: generateDocID(pkg.DocURL),
Source: sourceName,
Type: "go-package",
Title: fmt.Sprintf("%s - %s", pkg.Name, pkg.ImportPath),
Content: content,
URL: pkg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) buildPackageContent(pkg *godocs.Package) string {
var parts []string
parts = append(parts, fmt.Sprintf("# Package %s\n", pkg.ImportPath))
if pkg.Synopsis != "" {
parts = append(parts, pkg.Synopsis)
}
if pkg.Doc != "" {
parts = append(parts, "\n## Documentation\n")
parts = append(parts, pkg.Doc)
}
if len(pkg.Functions) > 0 {
parts = append(parts, fmt.Sprintf("\n## Functions (%d)\n", len(pkg.Functions)))
for _, fn := range pkg.Functions {
parts = append(parts, fmt.Sprintf("- `%s`", fn.Signature))
}
}
if len(pkg.Types) > 0 {
parts = append(parts, fmt.Sprintf("\n## Types (%d)\n", len(pkg.Types)))
for _, t := range pkg.Types {
parts = append(parts, fmt.Sprintf("- `%s` (%s)", t.Name, t.Kind))
}
}
if len(pkg.Constants) > 0 {
parts = append(parts, fmt.Sprintf("\n## Constants (%d)\n", len(pkg.Constants)))
}
if len(pkg.Variables) > 0 {
parts = append(parts, fmt.Sprintf("\n## Variables (%d)\n", len(pkg.Variables)))
}
return strings.Join(parts, "\n")
}
func (s *GoDocsScraper) functionToDocument(fn *godocs.Function, pkg *godocs.Package, sourceName string) *Document {
content := s.buildFunctionContent(fn, pkg)
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"symbol": fn.Name,
"signature": fn.Signature,
"kind": "function",
}
examplesJSON, _ := json.Marshal(fn.Examples)
metadata["examples"] = string(examplesJSON)
return &Document{
ID: generateDocID(fmt.Sprintf("%s#%s", pkg.DocURL, fn.Name)),
Source: sourceName,
Type: "go-function",
Title: fmt.Sprintf("%s.%s", pkg.Name, fn.Name),
Content: content,
URL: fmt.Sprintf("%s#%s", pkg.DocURL, fn.Name),
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) buildFunctionContent(fn *godocs.Function, pkg *godocs.Package) string {
var parts []string
parts = append(parts, fmt.Sprintf("# %s.%s\n", pkg.Name, fn.Name))
parts = append(parts, fmt.Sprintf("```\n%s\n```", fn.Signature))
if fn.Doc != "" {
parts = append(parts, "\n"+fn.Doc)
}
for _, ex := range fn.Examples {
parts = append(parts, fmt.Sprintf("\n### Example: %s\n", ex.Name))
if ex.Doc != "" {
parts = append(parts, ex.Doc)
}
parts = append(parts, fmt.Sprintf("```go\n%s\n```", ex.Code))
if ex.Output != "" {
parts = append(parts, fmt.Sprintf("Output:\n```\n%s\n```", ex.Output))
}
}
return strings.Join(parts, "\n")
}
func (s *GoDocsScraper) typeToDocument(t *godocs.Type, pkg *godocs.Package, sourceName string) *Document {
content := s.buildTypeContent(t, pkg)
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"symbol": t.Name,
"kind": "type",
"type_kind": t.Kind,
"underlying": t.Underlying,
"method_count": len(t.Methods),
}
fieldsJSON, _ := json.Marshal(t.Fields)
metadata["fields"] = string(fieldsJSON)
return &Document{
ID: generateDocID(fmt.Sprintf("%s#%s", pkg.DocURL, t.Name)),
Source: sourceName,
Type: "go-type",
Title: fmt.Sprintf("%s.%s", pkg.Name, t.Name),
Content: content,
URL: fmt.Sprintf("%s#%s", pkg.DocURL, t.Name),
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) buildTypeContent(t *godocs.Type, pkg *godocs.Package) string {
var parts []string
parts = append(parts, fmt.Sprintf("# type %s.%s\n", pkg.Name, t.Name))
parts = append(parts, fmt.Sprintf("```\n%s\n```", t.Underlying))
if t.Doc != "" {
parts = append(parts, "\n"+t.Doc)
}
if len(t.Fields) > 0 {
parts = append(parts, "\n### Fields\n")
for _, f := range t.Fields {
if f.Doc != "" {
parts = append(parts, fmt.Sprintf("- `%s %s` - %s", f.Name, f.Type, f.Doc))
} else {
parts = append(parts, fmt.Sprintf("- `%s %s`", f.Name, f.Type))
}
}
}
if len(t.Methods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Methods (%d)\n", len(t.Methods)))
for _, m := range t.Methods {
parts = append(parts, fmt.Sprintf("- `%s`", m.Signature))
}
}
return strings.Join(parts, "\n")
}
func (s *GoDocsScraper) methodToDocument(m *godocs.Method, t *godocs.Type, pkg *godocs.Package, sourceName string) *Document {
content := s.buildMethodContent(m, t, pkg)
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"type": t.Name,
"symbol": m.Name,
"receiver": m.Receiver,
"signature": m.Signature,
"kind": "method",
}
return &Document{
ID: generateDocID(fmt.Sprintf("%s#%s.%s", pkg.DocURL, t.Name, m.Name)),
Source: sourceName,
Type: "go-method",
Title: fmt.Sprintf("%s.%s.%s", pkg.Name, t.Name, m.Name),
Content: content,
URL: fmt.Sprintf("%s#%s.%s", pkg.DocURL, t.Name, m.Name),
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) buildMethodContent(m *godocs.Method, t *godocs.Type, pkg *godocs.Package) string {
var parts []string
parts = append(parts, fmt.Sprintf("# func (%s) %s\n", m.Receiver, m.Name))
parts = append(parts, fmt.Sprintf("```\n%s\n```", m.Signature))
if m.Doc != "" {
parts = append(parts, "\n"+m.Doc)
}
return strings.Join(parts, "\n")
}
func (s *GoDocsScraper) constantToDocument(c *godocs.Value, pkg *godocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Constants\n\n")
if c.Doc != "" {
fmt.Fprintf(&content, "%s\n\n", c.Doc)
}
if len(c.Names) > 1 {
fmt.Fprintf(&content, "```go\nconst (\n")
for _, name := range c.Names {
fmt.Fprintf(&content, "\t%s\n", name)
}
fmt.Fprintf(&content, ")\n```")
} else {
fmt.Fprintf(&content, "```go\nconst %s = %s\n```", c.Name, c.Value)
}
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"names": c.Names,
"kind": "constant",
}
return &Document{
ID: generateDocID(fmt.Sprintf("%s#const-%s", pkg.DocURL, c.Name)),
Source: sourceName,
Type: "go-constant",
Title: fmt.Sprintf("%s.%s (const)", pkg.Name, c.Name),
Content: content.String(),
URL: pkg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) variableToDocument(v *godocs.Value, pkg *godocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Variables\n\n")
if v.Doc != "" {
fmt.Fprintf(&content, "%s\n\n", v.Doc)
}
fmt.Fprintf(&content, "```go\nvar %s", v.Name)
if v.Type != "" {
fmt.Fprintf(&content, " %s", v.Type)
}
if v.Value != "" {
fmt.Fprintf(&content, " = %s", v.Value)
}
fmt.Fprintf(&content, "\n```")
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"name": v.Name,
"type": v.Type,
"kind": "variable",
}
return &Document{
ID: generateDocID(fmt.Sprintf("%s#var-%s", pkg.DocURL, v.Name)),
Source: sourceName,
Type: "go-variable",
Title: fmt.Sprintf("%s.%s (var)", pkg.Name, v.Name),
Content: content.String(),
URL: pkg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+254
View File
@@ -0,0 +1,254 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/javadocs"
)
type JavaDocsScraper struct {
config *Config
parser *javadocs.Parser
client *http.Client
}
func NewJavaDocsScraper(config *Config) *JavaDocsScraper {
return &JavaDocsScraper{
config: config,
parser: javadocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *JavaDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Java docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
pkg, err := s.parser.ParsePackagePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse package: %w", err)
}
mainDoc := s.packageToDocument(pkg, source.Name)
documents = append(documents, mainDoc)
for _, class := range pkg.Classes {
doc := s.classToDocument(class, pkg, source.Name)
documents = append(documents, doc)
}
for _, iface := range pkg.Interfaces {
doc := s.interfaceToDocument(iface, pkg, source.Name)
documents = append(documents, doc)
}
for _, enum := range pkg.Enums {
doc := s.enumToDocument(enum, pkg, source.Name)
documents = append(documents, doc)
}
for _, exc := range pkg.Exceptions {
doc := s.exceptionToDocument(exc, pkg, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *JavaDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *JavaDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *JavaDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *JavaDocsScraper) packageToDocument(pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Package %s\n\n", pkg.Name)
fmt.Fprintf(&content, "%s\n", pkg.Doc)
metadata := map[string]interface{}{
"package": pkg.Name,
"doc_url": pkg.DocURL,
"doc_type": "java-package",
}
return &Document{
ID: generateDocID(pkg.DocURL),
Source: sourceName,
Type: "java-package",
Title: pkg.Name,
Content: content.String(),
URL: pkg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *JavaDocsScraper) classToDocument(class *javadocs.Class, pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", class.QualifiedName)
fmt.Fprintf(&content, "Kind: %s\n\n", class.Kind)
fmt.Fprintf(&content, "%s\n", class.Doc)
if len(class.Methods) > 0 {
fmt.Fprintf(&content, "\n## Methods\n")
for _, m := range class.Methods {
fmt.Fprintf(&content, "- `%s`\n", m.Signature)
}
}
metadata := map[string]interface{}{
"package": pkg.Name,
"qualified_name": class.QualifiedName,
"kind": string(class.Kind),
"doc_url": class.DocURL,
}
return &Document{
ID: generateDocID(class.DocURL),
Source: sourceName,
Type: "java-class",
Title: class.QualifiedName,
Content: content.String(),
URL: class.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *JavaDocsScraper) interfaceToDocument(iface *javadocs.Class, pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (interface)\n\n", iface.QualifiedName)
fmt.Fprintf(&content, "%s\n", iface.Doc)
metadata := map[string]interface{}{
"package": pkg.Name,
"qualified_name": iface.QualifiedName,
"kind": "interface",
"doc_url": iface.DocURL,
}
return &Document{
ID: generateDocID(iface.DocURL),
Source: sourceName,
Type: "java-interface",
Title: iface.QualifiedName,
Content: content.String(),
URL: iface.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *JavaDocsScraper) enumToDocument(enum *javadocs.Enum, pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (enum)\n\n", enum.QualifiedName)
fmt.Fprintf(&content, "%s\n", enum.Doc)
if len(enum.Constants) > 0 {
fmt.Fprintf(&content, "\n## Constants\n")
for _, c := range enum.Constants {
fmt.Fprintf(&content, "- `%s`\n", c.Name)
}
}
metadata := map[string]interface{}{
"package": pkg.Name,
"qualified_name": enum.QualifiedName,
"kind": "enum",
"doc_url": enum.DocURL,
}
return &Document{
ID: generateDocID(enum.DocURL),
Source: sourceName,
Type: "java-enum",
Title: enum.QualifiedName,
Content: content.String(),
URL: enum.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *JavaDocsScraper) exceptionToDocument(exc *javadocs.Class, pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (exception)\n\n", exc.QualifiedName)
fmt.Fprintf(&content, "%s\n", exc.Doc)
metadata := map[string]interface{}{
"package": pkg.Name,
"qualified_name": exc.QualifiedName,
"kind": "exception",
"doc_url": exc.DocURL,
}
return &Document{
ID: generateDocID(exc.DocURL),
Source: sourceName,
Type: "java-exception",
Title: exc.QualifiedName,
Content: content.String(),
URL: exc.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+222
View File
@@ -0,0 +1,222 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/mcpdocs"
)
type MCPDocsScraper struct {
config *Config
parser *mcpdocs.Parser
client *http.Client
}
func NewMCPDocsScraper(config *Config) *MCPDocsScraper {
return &MCPDocsScraper{
config: config,
parser: mcpdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *MCPDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for MCP docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
server, err := s.parser.ParseServerPage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse MCP server page: %w", err)
}
mainDoc := s.serverToDocument(server, source.Name)
documents = append(documents, mainDoc)
for _, tool := range server.Tools {
doc := s.toolToDocument(tool, server, source.Name)
documents = append(documents, doc)
}
for _, res := range server.Resources {
doc := s.resourceToDocument(res, server, source.Name)
documents = append(documents, doc)
}
for _, prompt := range server.Prompts {
doc := s.promptToDocument(prompt, server, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *MCPDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *MCPDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *MCPDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *MCPDocsScraper) serverToDocument(server *mcpdocs.Server, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", server.Name)
fmt.Fprintf(&content, "%s\n", server.Description)
if len(server.Tools) > 0 {
fmt.Fprintf(&content, "\n## Tools (%d)\n", len(server.Tools))
for _, t := range server.Tools {
fmt.Fprintf(&content, "- `%s`: %s\n", t.Name, t.Description)
}
}
metadata := map[string]interface{}{
"server": server.Name,
"category": server.Category,
"doc_url": server.DocURL,
"doc_type": "mcp-server",
}
return &Document{
ID: generateDocID(server.DocURL),
Source: sourceName,
Type: "mcp-server",
Title: server.Name,
Content: content.String(),
URL: server.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *MCPDocsScraper) toolToDocument(tool *mcpdocs.Tool, server *mcpdocs.Server, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", tool.Name)
fmt.Fprintf(&content, "Server: %s\n\n", server.Name)
fmt.Fprintf(&content, "%s\n", tool.Description)
metadata := map[string]interface{}{
"server": server.Name,
"tool": tool.Name,
"doc_url": tool.DocURL,
"doc_type": "mcp-tool",
}
return &Document{
ID: generateDocID(tool.DocURL + "#" + tool.Name),
Source: sourceName,
Type: "mcp-tool",
Title: fmt.Sprintf("%s.%s", server.Name, tool.Name),
Content: content.String(),
URL: tool.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *MCPDocsScraper) resourceToDocument(res *mcpdocs.Resource, server *mcpdocs.Server, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", res.Name)
fmt.Fprintf(&content, "Server: %s\n", server.Name)
fmt.Fprintf(&content, "URI: %s\n\n", res.URI)
fmt.Fprintf(&content, "%s\n", res.Description)
metadata := map[string]interface{}{
"server": server.Name,
"resource": res.Name,
"uri": res.URI,
"doc_url": res.DocURL,
"doc_type": "mcp-resource",
}
return &Document{
ID: generateDocID(res.DocURL + "#" + res.Name),
Source: sourceName,
Type: "mcp-resource",
Title: res.Name,
Content: content.String(),
URL: res.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *MCPDocsScraper) promptToDocument(prompt *mcpdocs.Prompt, server *mcpdocs.Server, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", prompt.Name)
fmt.Fprintf(&content, "Server: %s\n\n", server.Name)
fmt.Fprintf(&content, "%s\n", prompt.Description)
metadata := map[string]interface{}{
"server": server.Name,
"prompt": prompt.Name,
"doc_url": prompt.DocURL,
"doc_type": "mcp-prompt",
}
return &Document{
ID: generateDocID(prompt.DocURL + "#" + prompt.Name),
Source: sourceName,
Type: "mcp-prompt",
Title: prompt.Name,
Content: content.String(),
URL: prompt.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+298
View File
@@ -0,0 +1,298 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/nuxtdocs"
)
type NuxtDocsScraper struct {
config *Config
parser *nuxtdocs.Parser
client *http.Client
}
func NewNuxtDocsScraper(config *Config) *NuxtDocsScraper {
return &NuxtDocsScraper{
config: config,
parser: nuxtdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *NuxtDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Nuxt docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
ref, err := s.parser.ParseReferencePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse reference: %w", err)
}
mainDoc := s.referenceToDocument(ref, source.Name)
documents = append(documents, mainDoc)
for _, comp := range ref.Components {
doc := s.componentToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, comp := range ref.Composables {
doc := s.composableToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, util := range ref.Utilities {
doc := s.utilityToDocument(util, source.Name)
documents = append(documents, doc)
}
for _, cfg := range ref.Configs {
doc := s.configToDocument(cfg, source.Name)
documents = append(documents, doc)
}
for _, cmd := range ref.Commands {
doc := s.commandToDocument(cmd, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *NuxtDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *NuxtDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *NuxtDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *NuxtDocsScraper) referenceToDocument(ref *nuxtdocs.Reference, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Nuxt API Reference\n\n")
fmt.Fprintf(&content, "Components: %d, Composables: %d, Utilities: %d, Configs: %d, Commands: %d\n",
len(ref.Components), len(ref.Composables), len(ref.Utilities), len(ref.Configs), len(ref.Commands))
return &Document{
ID: generateDocID(ref.DocURL),
Source: sourceName,
Type: "nuxt-reference",
Title: "Nuxt API Reference",
Content: content.String(),
URL: ref.DocURL,
Metadata: map[string]interface{}{"doc_type": "nuxt-reference"},
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) componentToDocument(comp *nuxtdocs.Component, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# <%s />\n\n", comp.Name)
fmt.Fprintf(&content, "%s\n", comp.Doc)
if len(comp.Props) > 0 {
fmt.Fprintf(&content, "\n## Props\n")
for _, p := range comp.Props {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"name": comp.Name,
"category": comp.Category,
"doc_url": comp.DocURL,
"doc_type": "nuxt-component",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "nuxt-component",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) composableToDocument(comp *nuxtdocs.Composable, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", comp.Name)
if comp.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", comp.Signature)
}
fmt.Fprintf(&content, "%s\n", comp.Doc)
if comp.Returns != "" {
fmt.Fprintf(&content, "\n**Returns:** `%s`\n", comp.Returns)
}
metadata := map[string]interface{}{
"name": comp.Name,
"category": comp.Category,
"doc_url": comp.DocURL,
"doc_type": "nuxt-composable",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "nuxt-composable",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) utilityToDocument(util *nuxtdocs.Utility, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", util.Name)
if util.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", util.Signature)
}
fmt.Fprintf(&content, "%s\n", util.Doc)
metadata := map[string]interface{}{
"name": util.Name,
"doc_url": util.DocURL,
"doc_type": "nuxt-utility",
}
return &Document{
ID: generateDocID(util.DocURL),
Source: sourceName,
Type: "nuxt-utility",
Title: util.Name,
Content: content.String(),
URL: util.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) configToDocument(cfg *nuxtdocs.Config, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", cfg.Name)
if cfg.Type != "" {
fmt.Fprintf(&content, "Type: `%s`\n\n", cfg.Type)
}
if cfg.Default != "" {
fmt.Fprintf(&content, "Default: `%s`\n\n", cfg.Default)
}
fmt.Fprintf(&content, "%s\n", cfg.Doc)
metadata := map[string]interface{}{
"name": cfg.Name,
"type": cfg.Type,
"default": cfg.Default,
"category": cfg.Category,
"doc_url": cfg.DocURL,
"doc_type": "nuxt-config",
}
return &Document{
ID: generateDocID(cfg.DocURL),
Source: sourceName,
Type: "nuxt-config",
Title: cfg.Name,
Content: content.String(),
URL: cfg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) commandToDocument(cmd *nuxtdocs.Command, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", cmd.Name)
if cmd.Usage != "" {
fmt.Fprintf(&content, "```\n%s\n```\n\n", cmd.Usage)
}
fmt.Fprintf(&content, "%s\n", cmd.Doc)
if len(cmd.Flags) > 0 {
fmt.Fprintf(&content, "\n## Flags\n")
for _, f := range cmd.Flags {
fmt.Fprintf(&content, "- `--%s`: %s\n", f.Name, f.Doc)
}
}
metadata := map[string]interface{}{
"name": cmd.Name,
"usage": cmd.Usage,
"doc_url": cmd.DocURL,
"doc_type": "nuxt-command",
}
return &Document{
ID: generateDocID(cmd.DocURL),
Source: sourceName,
Type: "nuxt-command",
Title: cmd.Name,
Content: content.String(),
URL: cmd.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+463
View File
@@ -0,0 +1,463 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/pythondocs"
)
type PythonDocsScraper struct {
config *Config
parser *pythondocs.Parser
client *http.Client
}
func NewPythonDocsScraper(config *Config) *PythonDocsScraper {
return &PythonDocsScraper{
config: config,
parser: pythondocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *PythonDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Python docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
module, err := s.parser.ParseModulePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse module: %w", err)
}
mainDoc := s.moduleToDocument(module, source.Name)
documents = append(documents, mainDoc)
for _, class := range module.Classes {
doc := s.classToDocument(class, module, source.Name)
documents = append(documents, doc)
for _, method := range class.Methods {
methodDoc := s.methodToDocument(method, class, module, source.Name)
documents = append(documents, methodDoc)
}
for _, method := range class.ClassMethods {
methodDoc := s.classMethodToDocument(method, class, module, source.Name)
documents = append(documents, methodDoc)
}
for _, attr := range class.Attributes {
attrDoc := s.attributeToDocument(attr, class, module, source.Name)
documents = append(documents, attrDoc)
}
}
for _, fn := range module.Functions {
doc := s.functionToDocument(fn, module, source.Name)
documents = append(documents, doc)
}
for _, exc := range module.Exceptions {
doc := s.exceptionToDocument(exc, module, source.Name)
documents = append(documents, doc)
}
for _, data := range module.Constants {
doc := s.dataToDocument(data, module, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *PythonDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *PythonDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *PythonDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *PythonDocsScraper) moduleToDocument(module *pythondocs.Module, sourceName string) *Document {
content := s.buildModuleContent(module)
metadata := map[string]interface{}{
"name": module.Name,
"path": module.Path,
"version": module.Version,
"doc_url": module.DocURL,
"class_count": len(module.Classes),
"function_count": len(module.Functions),
"exception_count": len(module.Exceptions),
"data_count": len(module.Constants),
}
return &Document{
ID: generateDocID(module.DocURL),
Source: sourceName,
Type: "python-module",
Title: fmt.Sprintf("%s - Python", module.Name),
Content: content,
URL: module.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) buildModuleContent(module *pythondocs.Module) string {
var parts []string
parts = append(parts, fmt.Sprintf("# Module %s\n", module.Name))
if module.Synopsis != "" {
parts = append(parts, module.Synopsis)
}
if module.Doc != "" {
parts = append(parts, "\n"+module.Doc)
}
if len(module.Classes) > 0 {
parts = append(parts, fmt.Sprintf("\n## Classes (%d)\n", len(module.Classes)))
for _, class := range module.Classes {
parts = append(parts, fmt.Sprintf("- `%s`", class.Name))
}
}
if len(module.Functions) > 0 {
parts = append(parts, fmt.Sprintf("\n## Functions (%d)\n", len(module.Functions)))
for _, fn := range module.Functions {
parts = append(parts, fmt.Sprintf("- `%s`", fn.Name))
}
}
if len(module.Exceptions) > 0 {
parts = append(parts, fmt.Sprintf("\n## Exceptions (%d)\n", len(module.Exceptions)))
for _, exc := range module.Exceptions {
parts = append(parts, fmt.Sprintf("- `%s`", exc.Name))
}
}
return strings.Join(parts, "\n")
}
func (s *PythonDocsScraper) classToDocument(class *pythondocs.Class, module *pythondocs.Module, sourceName string) *Document {
content := s.buildClassContent(class, module)
metadata := map[string]interface{}{
"module": module.Name,
"class": class.Name,
"qual_name": class.QualName,
"bases": class.Bases,
"method_count": len(class.Methods),
"attribute_count": len(class.Attributes),
}
return &Document{
ID: generateDocID(class.DocURL),
Source: sourceName,
Type: "python-class",
Title: fmt.Sprintf("%s.%s - Python", module.Name, class.Name),
Content: content,
URL: class.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) buildClassContent(class *pythondocs.Class, module *pythondocs.Module) string {
var parts []string
parts = append(parts, fmt.Sprintf("# class %s.%s\n", module.Name, class.Name))
if class.Signature != "" {
parts = append(parts, fmt.Sprintf("```python\n%s\n```", class.Signature))
}
if class.Doc != "" {
parts = append(parts, "\n"+class.Doc)
}
if len(class.Bases) > 0 {
parts = append(parts, fmt.Sprintf("\n**Bases:** %s\n", strings.Join(class.Bases, ", ")))
}
if len(class.Methods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Methods (%d)\n", len(class.Methods)))
for _, m := range class.Methods {
parts = append(parts, fmt.Sprintf("- `%s`", m.Name))
}
}
if len(class.ClassMethods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Class Methods (%d)\n", len(class.ClassMethods)))
for _, m := range class.ClassMethods {
parts = append(parts, fmt.Sprintf("- `%s` (classmethod)", m.Name))
}
}
if len(class.Attributes) > 0 {
parts = append(parts, fmt.Sprintf("\n### Attributes (%d)\n", len(class.Attributes)))
for _, a := range class.Attributes {
parts = append(parts, fmt.Sprintf("- `%s`", a.Name))
}
}
return strings.Join(parts, "\n")
}
func (s *PythonDocsScraper) methodToDocument(method *pythondocs.Method, class *pythondocs.Class, module *pythondocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s.%s.%s\n\n", module.Name, class.Name, method.Name)
if method.Signature != "" {
fmt.Fprintf(&content, "```python\n%s\n```\n", method.Signature)
}
if method.Doc != "" {
fmt.Fprintf(&content, "%s\n", method.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"class": class.Name,
"method": method.Name,
"qual_name": method.QualName,
"is_static": method.IsStatic,
"is_async": method.IsAsync,
}
return &Document{
ID: generateDocID(method.DocURL),
Source: sourceName,
Type: "python-method",
Title: fmt.Sprintf("%s.%s.%s - Python", module.Name, class.Name, method.Name),
Content: content.String(),
URL: method.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) classMethodToDocument(method *pythondocs.Method, class *pythondocs.Class, module *pythondocs.Module, sourceName string) *Document {
content := s.buildMethodContent(method, class, module)
metadata := map[string]interface{}{
"module": module.Name,
"class": class.Name,
"method": method.Name,
"qual_name": method.QualName,
"is_classmethod": true,
}
return &Document{
ID: generateDocID(method.DocURL),
Source: sourceName,
Type: "python-classmethod",
Title: fmt.Sprintf("%s.%s.%s (classmethod) - Python", module.Name, class.Name, method.Name),
Content: content,
URL: method.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) buildMethodContent(method *pythondocs.Method, class *pythondocs.Class, module *pythondocs.Module) string {
var parts []string
parts = append(parts, fmt.Sprintf("# %s.%s.%s\n", module.Name, class.Name, method.Name))
if method.Signature != "" {
parts = append(parts, fmt.Sprintf("```python\n%s\n```", method.Signature))
}
if method.Doc != "" {
parts = append(parts, "\n"+method.Doc)
}
return strings.Join(parts, "\n")
}
func (s *PythonDocsScraper) attributeToDocument(attr *pythondocs.Attribute, class *pythondocs.Class, module *pythondocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s.%s.%s\n\n", module.Name, class.Name, attr.Name)
if attr.Doc != "" {
fmt.Fprintf(&content, "%s\n", attr.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"class": class.Name,
"attr": attr.Name,
"type": attr.Type,
}
return &Document{
ID: generateDocID(attr.DocURL),
Source: sourceName,
Type: "python-attribute",
Title: fmt.Sprintf("%s.%s.%s - Python", module.Name, class.Name, attr.Name),
Content: content.String(),
URL: attr.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) functionToDocument(fn *pythondocs.Function, module *pythondocs.Module, sourceName string) *Document {
content := s.buildFunctionContent(fn, module)
metadata := map[string]interface{}{
"module": module.Name,
"function": fn.Name,
"qual_name": fn.QualName,
"signature": fn.Signature,
"is_async": fn.IsAsync,
"is_generator": fn.IsGenerator,
}
return &Document{
ID: generateDocID(fn.DocURL),
Source: sourceName,
Type: "python-function",
Title: fmt.Sprintf("%s.%s - Python", module.Name, fn.Name),
Content: content,
URL: fn.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) buildFunctionContent(fn *pythondocs.Function, module *pythondocs.Module) string {
var parts []string
parts = append(parts, fmt.Sprintf("# %s.%s\n", module.Name, fn.Name))
if fn.Signature != "" {
parts = append(parts, fmt.Sprintf("```python\n%s\n```", fn.Signature))
}
if fn.Doc != "" {
parts = append(parts, "\n"+fn.Doc)
}
return strings.Join(parts, "\n")
}
func (s *PythonDocsScraper) exceptionToDocument(exc *pythondocs.Exception, module *pythondocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s.%s\n\n", module.Name, exc.Name)
if exc.Signature != "" {
fmt.Fprintf(&content, "```python\n%s\n```\n", exc.Signature)
}
if exc.Doc != "" {
fmt.Fprintf(&content, "%s\n", exc.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"exception": exc.Name,
"qual_name": exc.QualName,
"bases": exc.Bases,
}
return &Document{
ID: generateDocID(exc.DocURL),
Source: sourceName,
Type: "python-exception",
Title: fmt.Sprintf("%s.%s - Python", module.Name, exc.Name),
Content: content.String(),
URL: exc.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) dataToDocument(data *pythondocs.Data, module *pythondocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s.%s\n\n", module.Name, data.Name)
if data.Doc != "" {
fmt.Fprintf(&content, "%s\n", data.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"data": data.Name,
"type": data.Type,
"value": data.Value,
}
return &Document{
ID: generateDocID(data.DocURL),
Source: sourceName,
Type: "python-data",
Title: fmt.Sprintf("%s.%s - Python", module.Name, data.Name),
Content: content.String(),
URL: data.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+214
View File
@@ -0,0 +1,214 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/reactdocs"
)
type ReactDocsScraper struct {
config *Config
parser *reactdocs.Parser
client *http.Client
}
func NewReactDocsScraper(config *Config) *ReactDocsScraper {
return &ReactDocsScraper{
config: config,
parser: reactdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *ReactDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for React docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
ref, err := s.parser.ParseReferencePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse reference: %w", err)
}
mainDoc := s.referenceToDocument(ref, source.Name)
documents = append(documents, mainDoc)
for _, hook := range ref.Hooks {
doc := s.hookToDocument(hook, source.Name)
documents = append(documents, doc)
}
for _, comp := range ref.Components {
doc := s.componentToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, api := range ref.APIs {
doc := s.apiToDocument(api, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *ReactDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *ReactDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *ReactDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *ReactDocsScraper) referenceToDocument(ref *reactdocs.Reference, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# React API Reference\n\n")
fmt.Fprintf(&content, "Hooks: %d, Components: %d, APIs: %d\n", len(ref.Hooks), len(ref.Components), len(ref.APIs))
return &Document{
ID: generateDocID(ref.DocURL),
Source: sourceName,
Type: "react-reference",
Title: "React API Reference",
Content: content.String(),
URL: ref.DocURL,
Metadata: map[string]interface{}{"doc_type": "react-reference"},
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *ReactDocsScraper) hookToDocument(hook *reactdocs.Hook, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", hook.Name)
if hook.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", hook.Signature)
}
fmt.Fprintf(&content, "%s\n", hook.Doc)
metadata := map[string]interface{}{
"name": hook.Name,
"category": hook.Category,
"doc_url": hook.DocURL,
"doc_type": "react-hook",
}
return &Document{
ID: generateDocID(hook.DocURL),
Source: sourceName,
Type: "react-hook",
Title: hook.Name,
Content: content.String(),
URL: hook.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *ReactDocsScraper) componentToDocument(comp *reactdocs.Component, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# <%s />\n\n", comp.Name)
fmt.Fprintf(&content, "%s\n", comp.Doc)
if len(comp.Props) > 0 {
fmt.Fprintf(&content, "\n## Props\n")
for _, p := range comp.Props {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"name": comp.Name,
"doc_url": comp.DocURL,
"doc_type": "react-component",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "react-component",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *ReactDocsScraper) apiToDocument(api *reactdocs.API, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s()\n\n", api.Name)
if api.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", api.Signature)
}
fmt.Fprintf(&content, "%s\n", api.Doc)
metadata := map[string]interface{}{
"name": api.Name,
"doc_url": api.DocURL,
"doc_type": "react-api",
}
return &Document{
ID: generateDocID(api.DocURL),
Source: sourceName,
Type: "react-api",
Title: api.Name,
Content: content.String(),
URL: api.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+563
View File
@@ -0,0 +1,563 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/rustdocs"
)
type RustDocsScraper struct {
config *Config
parser *rustdocs.Parser
client *http.Client
}
func NewRustDocsScraper(config *Config) *RustDocsScraper {
return &RustDocsScraper{
config: config,
parser: rustdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *RustDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Rust docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
crate, err := s.parser.ParseCratePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse crate: %w", err)
}
mainDoc := s.crateToDocument(crate, source.Name)
documents = append(documents, mainDoc)
for _, m := range crate.Modules {
doc := s.moduleToDocument(m, crate, source.Name)
documents = append(documents, doc)
}
for _, st := range crate.Structs {
doc := s.structToDocument(st, crate, source.Name)
documents = append(documents, doc)
}
for _, e := range crate.Enums {
doc := s.enumToDocument(e, crate, source.Name)
documents = append(documents, doc)
}
for _, t := range crate.Traits {
doc := s.traitToDocument(t, crate, source.Name)
documents = append(documents, doc)
}
for _, f := range crate.Functions {
doc := s.funcToDocument(f, crate, source.Name)
documents = append(documents, doc)
}
for _, m := range crate.Macros {
doc := s.macroToDocument(m, crate, source.Name)
documents = append(documents, doc)
}
for _, c := range crate.Constants {
doc := s.constToDocument(c, crate, source.Name)
documents = append(documents, doc)
}
for _, st := range crate.Statics {
doc := s.staticToDocument(st, crate, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *RustDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *RustDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *RustDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *RustDocsScraper) crateToDocument(crate *rustdocs.Crate, sourceName string) *Document {
content := s.buildCrateContent(crate)
metadata := map[string]interface{}{
"name": crate.Name,
"version": crate.Version,
"repository": crate.Repository,
"license": crate.License,
"doc_url": crate.DocURL,
"module_count": len(crate.Modules),
"struct_count": len(crate.Structs),
"enum_count": len(crate.Enums),
"trait_count": len(crate.Traits),
"function_count": len(crate.Functions),
"macro_count": len(crate.Macros),
"constant_count": len(crate.Constants),
"static_count": len(crate.Statics),
}
return &Document{
ID: generateDocID(crate.DocURL),
Source: sourceName,
Type: "rust-crate",
Title: fmt.Sprintf("%s - Rust", crate.Name),
Content: content,
URL: crate.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) buildCrateContent(crate *rustdocs.Crate) string {
var parts []string
parts = append(parts, fmt.Sprintf("# Crate %s\n", crate.Name))
if crate.Version != "" {
parts = append(parts, fmt.Sprintf("Version: %s\n", crate.Version))
}
if crate.Description != "" {
parts = append(parts, crate.Description)
}
if len(crate.Modules) > 0 {
parts = append(parts, fmt.Sprintf("\n## Modules (%d)\n", len(crate.Modules)))
for _, m := range crate.Modules {
parts = append(parts, fmt.Sprintf("- `%s`", m.Name))
}
}
if len(crate.Structs) > 0 {
parts = append(parts, fmt.Sprintf("\n## Structs (%d)\n", len(crate.Structs)))
for _, st := range crate.Structs {
parts = append(parts, fmt.Sprintf("- `%s`", st.Name))
}
}
if len(crate.Enums) > 0 {
parts = append(parts, fmt.Sprintf("\n## Enums (%d)\n", len(crate.Enums)))
for _, e := range crate.Enums {
parts = append(parts, fmt.Sprintf("- `%s`", e.Name))
}
}
if len(crate.Traits) > 0 {
parts = append(parts, fmt.Sprintf("\n## Traits (%d)\n", len(crate.Traits)))
for _, t := range crate.Traits {
parts = append(parts, fmt.Sprintf("- `%s`", t.Name))
}
}
if len(crate.Functions) > 0 {
parts = append(parts, fmt.Sprintf("\n## Functions (%d)\n", len(crate.Functions)))
for _, f := range crate.Functions {
parts = append(parts, fmt.Sprintf("- `%s`", f.Name))
}
}
if len(crate.Macros) > 0 {
parts = append(parts, fmt.Sprintf("\n## Macros (%d)\n", len(crate.Macros)))
for _, m := range crate.Macros {
parts = append(parts, fmt.Sprintf("- `%s`", m.Name))
}
}
return strings.Join(parts, "\n")
}
func (s *RustDocsScraper) moduleToDocument(m *rustdocs.Module, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Module %s::%s\n\n", crate.Name, m.Name)
if m.Doc != "" {
fmt.Fprintf(&content, "%s\n", m.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"module": m.Name,
"path": m.Path,
"is_experimental": m.IsExperimental,
"kind": "module",
}
return &Document{
ID: generateDocID(m.DocURL),
Source: sourceName,
Type: "rust-module",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, m.Name),
Content: content.String(),
URL: m.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) structToDocument(st *rustdocs.Struct, crate *rustdocs.Crate, sourceName string) *Document {
content := s.buildStructContent(st, crate)
metadata := map[string]interface{}{
"crate": crate.Name,
"struct": st.Name,
"path": st.Path,
"is_experimental": st.IsExperimental,
"kind": "struct",
"declaration": st.Declaration,
}
fieldsJSON, _ := json.Marshal(st.Fields)
metadata["fields"] = string(fieldsJSON)
return &Document{
ID: generateDocID(st.DocURL),
Source: sourceName,
Type: "rust-struct",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, st.Name),
Content: content,
URL: st.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) buildStructContent(st *rustdocs.Struct, crate *rustdocs.Crate) string {
var parts []string
parts = append(parts, fmt.Sprintf("# struct %s::%s\n", crate.Name, st.Name))
if st.Declaration != "" {
parts = append(parts, fmt.Sprintf("```rust\n%s\n```", st.Declaration))
}
if st.Doc != "" {
parts = append(parts, "\n"+st.Doc)
}
if len(st.Fields) > 0 {
parts = append(parts, "\n### Fields\n")
for _, f := range st.Fields {
if f.Doc != "" {
parts = append(parts, fmt.Sprintf("- `%s: %s` - %s", f.Name, f.Type, f.Doc))
} else {
parts = append(parts, fmt.Sprintf("- `%s: %s`", f.Name, f.Type))
}
}
}
if len(st.Methods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Methods (%d)\n", len(st.Methods)))
for _, m := range st.Methods {
parts = append(parts, fmt.Sprintf("- `%s`", m.Name))
}
}
return strings.Join(parts, "\n")
}
func (s *RustDocsScraper) enumToDocument(e *rustdocs.Enum, crate *rustdocs.Crate, sourceName string) *Document {
content := s.buildEnumContent(e, crate)
metadata := map[string]interface{}{
"crate": crate.Name,
"enum": e.Name,
"path": e.Path,
"is_experimental": e.IsExperimental,
"kind": "enum",
"declaration": e.Declaration,
}
return &Document{
ID: generateDocID(e.DocURL),
Source: sourceName,
Type: "rust-enum",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, e.Name),
Content: content,
URL: e.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) buildEnumContent(e *rustdocs.Enum, crate *rustdocs.Crate) string {
var parts []string
parts = append(parts, fmt.Sprintf("# enum %s::%s\n", crate.Name, e.Name))
if e.Declaration != "" {
parts = append(parts, fmt.Sprintf("```rust\n%s\n```", e.Declaration))
}
if e.Doc != "" {
parts = append(parts, "\n"+e.Doc)
}
if len(e.Variants) > 0 {
parts = append(parts, "\n### Variants\n")
for _, v := range e.Variants {
if v.Doc != "" {
parts = append(parts, fmt.Sprintf("- `%s` - %s", v.Name, v.Doc))
} else {
parts = append(parts, fmt.Sprintf("- `%s`", v.Name))
}
}
}
return strings.Join(parts, "\n")
}
func (s *RustDocsScraper) traitToDocument(t *rustdocs.Trait, crate *rustdocs.Crate, sourceName string) *Document {
content := s.buildTraitContent(t, crate)
metadata := map[string]interface{}{
"crate": crate.Name,
"trait": t.Name,
"path": t.Path,
"is_experimental": t.IsExperimental,
"kind": "trait",
"declaration": t.Declaration,
}
return &Document{
ID: generateDocID(t.DocURL),
Source: sourceName,
Type: "rust-trait",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, t.Name),
Content: content,
URL: t.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) buildTraitContent(t *rustdocs.Trait, crate *rustdocs.Crate) string {
var parts []string
parts = append(parts, fmt.Sprintf("# trait %s::%s\n", crate.Name, t.Name))
if t.Declaration != "" {
parts = append(parts, fmt.Sprintf("```rust\n%s\n```", t.Declaration))
}
if t.Doc != "" {
parts = append(parts, "\n"+t.Doc)
}
if len(t.Methods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Required Methods (%d)\n", len(t.Methods)))
for _, m := range t.Methods {
parts = append(parts, fmt.Sprintf("- `%s`", m.Signature))
}
}
return strings.Join(parts, "\n")
}
func (s *RustDocsScraper) funcToDocument(f *rustdocs.Func, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# fn %s::%s\n\n", crate.Name, f.Name)
if f.Signature != "" {
fmt.Fprintf(&content, "```rust\n%s\n```\n", f.Signature)
}
if f.Doc != "" {
fmt.Fprintf(&content, "%s\n", f.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"function": f.Name,
"path": f.Path,
"is_experimental": f.IsExperimental,
"is_unsafe": f.IsUnsafe,
"is_const": f.IsConst,
"is_async": f.IsAsync,
"kind": "fn",
"signature": f.Signature,
}
return &Document{
ID: generateDocID(f.DocURL),
Source: sourceName,
Type: "rust-fn",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, f.Name),
Content: content.String(),
URL: f.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) macroToDocument(m *rustdocs.Macro, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# macro %s::%s!\n\n", crate.Name, m.Name)
if m.Signature != "" {
fmt.Fprintf(&content, "```rust\n%s\n```\n", m.Signature)
}
if m.Doc != "" {
fmt.Fprintf(&content, "%s\n", m.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"macro": m.Name,
"path": m.Path,
"is_experimental": m.IsExperimental,
"kind": "macro",
}
return &Document{
ID: generateDocID(m.DocURL),
Source: sourceName,
Type: "rust-macro",
Title: fmt.Sprintf("%s::%s! - Rust", crate.Name, m.Name),
Content: content.String(),
URL: m.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) constToDocument(c *rustdocs.Const, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# const %s::%s\n\n", crate.Name, c.Name)
if c.Type != "" {
fmt.Fprintf(&content, "Type: `%s`\n", c.Type)
}
if c.Value != "" {
fmt.Fprintf(&content, "Value: `%s`\n", c.Value)
}
if c.Doc != "" {
fmt.Fprintf(&content, "\n%s\n", c.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"const": c.Name,
"path": c.Path,
"is_experimental": c.IsExperimental,
"type": c.Type,
"value": c.Value,
"kind": "const",
}
return &Document{
ID: generateDocID(c.DocURL),
Source: sourceName,
Type: "rust-const",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, c.Name),
Content: content.String(),
URL: c.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) staticToDocument(st *rustdocs.Static, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# static %s::%s\n\n", crate.Name, st.Name)
if st.Type != "" {
fmt.Fprintf(&content, "Type: `%s`\n", st.Type)
}
if st.IsMutable {
fmt.Fprintf(&content, "Mutability: mutable\n")
}
if st.Doc != "" {
fmt.Fprintf(&content, "\n%s\n", st.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"static": st.Name,
"path": st.Path,
"is_experimental": st.IsExperimental,
"is_mutable": st.IsMutable,
"type": st.Type,
"kind": "static",
}
return &Document{
ID: generateDocID(st.DocURL),
Source: sourceName,
Type: "rust-static",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, st.Name),
Content: content.String(),
URL: st.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+221
View File
@@ -0,0 +1,221 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/springdocs"
)
type SpringDocsScraper struct {
config *Config
parser *springdocs.Parser
client *http.Client
}
func NewSpringDocsScraper(config *Config) *SpringDocsScraper {
return &SpringDocsScraper{
config: config,
parser: springdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *SpringDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Spring docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
module, err := s.parser.ParseModulePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse module: %w", err)
}
mainDoc := s.moduleToDocument(module, source.Name)
documents = append(documents, mainDoc)
for _, class := range module.Classes {
doc := s.classToDocument(class, module, source.Name)
documents = append(documents, doc)
}
for _, prop := range module.Properties {
doc := s.propertyToDocument(prop, source.Name)
documents = append(documents, doc)
}
for _, guide := range module.Guides {
doc := s.guideToDocument(guide, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *SpringDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *SpringDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *SpringDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *SpringDocsScraper) moduleToDocument(module *springdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", module.Name)
fmt.Fprintf(&content, "%s\n", module.Doc)
metadata := map[string]interface{}{
"module": module.Name,
"version": module.Version,
"doc_url": module.DocURL,
"doc_type": "spring-module",
}
return &Document{
ID: generateDocID(module.DocURL),
Source: sourceName,
Type: "spring-module",
Title: module.Name,
Content: content.String(),
URL: module.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *SpringDocsScraper) classToDocument(class *springdocs.Class, module *springdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", class.QualifiedName)
fmt.Fprintf(&content, "%s\n", class.Doc)
if len(class.Methods) > 0 {
fmt.Fprintf(&content, "\n## Methods\n")
for _, m := range class.Methods {
fmt.Fprintf(&content, "- `%s`\n", m.Signature)
}
}
metadata := map[string]interface{}{
"module": module.Name,
"qualified_name": class.QualifiedName,
"kind": class.Kind,
"doc_url": class.DocURL,
}
return &Document{
ID: generateDocID(class.DocURL),
Source: sourceName,
Type: "spring-class",
Title: class.QualifiedName,
Content: content.String(),
URL: class.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *SpringDocsScraper) propertyToDocument(prop *springdocs.Property, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", prop.Name)
fmt.Fprintf(&content, "Type: %s\n", prop.Type)
if prop.Default != "" {
fmt.Fprintf(&content, "Default: `%s`\n", prop.Default)
}
fmt.Fprintf(&content, "\n%s\n", prop.Doc)
metadata := map[string]interface{}{
"property": prop.Name,
"type": prop.Type,
"default": prop.Default,
"doc_url": prop.DocURL,
}
return &Document{
ID: generateDocID(prop.Name),
Source: sourceName,
Type: "spring-property",
Title: prop.Name,
Content: content.String(),
URL: prop.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *SpringDocsScraper) guideToDocument(guide *springdocs.Guide, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", guide.Title)
fmt.Fprintf(&content, "%s\n", guide.Description)
metadata := map[string]interface{}{
"title": guide.Title,
"doc_url": guide.DocURL,
"level": guide.Level,
"doc_type": "spring-guide",
}
return &Document{
ID: generateDocID(guide.DocURL),
Source: sourceName,
Type: "spring-guide",
Title: guide.Title,
Content: content.String(),
URL: guide.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+261
View File
@@ -0,0 +1,261 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/tsdocs"
)
type TSDocsScraper struct {
config *Config
parser *tsdocs.Parser
client *http.Client
}
func NewTSDocsScraper(config *Config) *TSDocsScraper {
return &TSDocsScraper{
config: config,
parser: tsdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *TSDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for TypeScript docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
module, err := s.parser.ParseModulePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse module: %w", err)
}
mainDoc := s.moduleToDocument(module, source.Name)
documents = append(documents, mainDoc)
for _, iface := range module.Interfaces {
doc := s.interfaceToDocument(iface, module, source.Name)
documents = append(documents, doc)
}
for _, fn := range module.Functions {
doc := s.functionToDocument(fn, module, source.Name)
documents = append(documents, doc)
}
for _, class := range module.Classes {
doc := s.classToDocument(class, module, source.Name)
documents = append(documents, doc)
}
for _, ta := range module.Types {
doc := s.typeAliasToDocument(ta, module, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *TSDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *TSDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *TSDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *TSDocsScraper) moduleToDocument(module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", module.Name)
fmt.Fprintf(&content, "%s\n", module.Doc)
metadata := map[string]interface{}{
"module": module.Name,
"version": module.Version,
"doc_url": module.DocURL,
"doc_type": "ts-module",
}
return &Document{
ID: generateDocID(module.DocURL),
Source: sourceName,
Type: "ts-module",
Title: module.Name,
Content: content.String(),
URL: module.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *TSDocsScraper) interfaceToDocument(iface *tsdocs.Interface, module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (interface)\n\n", iface.Name)
if iface.Doc != "" {
fmt.Fprintf(&content, "%s\n\n", iface.Doc)
}
if len(iface.Properties) > 0 {
fmt.Fprintf(&content, "## Properties\n")
for _, p := range iface.Properties {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"module": module.Name,
"name": iface.Name,
"doc_url": iface.DocURL,
}
return &Document{
ID: generateDocID(iface.DocURL),
Source: sourceName,
Type: "ts-interface",
Title: iface.Name,
Content: content.String(),
URL: iface.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *TSDocsScraper) functionToDocument(fn *tsdocs.Function, module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s()\n\n", fn.Name)
if fn.Signature != "" {
fmt.Fprintf(&content, "```typescript\n%s\n```\n\n", fn.Signature)
}
if fn.Doc != "" {
fmt.Fprintf(&content, "%s\n", fn.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"name": fn.Name,
"return_type": fn.ReturnType,
"doc_url": fn.DocURL,
}
return &Document{
ID: generateDocID(fn.DocURL),
Source: sourceName,
Type: "ts-function",
Title: fn.Name,
Content: content.String(),
URL: fn.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *TSDocsScraper) classToDocument(class *tsdocs.Class, module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (class)\n\n", class.Name)
if class.Doc != "" {
fmt.Fprintf(&content, "%s\n\n", class.Doc)
}
if len(class.Methods) > 0 {
fmt.Fprintf(&content, "## Methods\n")
for _, m := range class.Methods {
fmt.Fprintf(&content, "- `%s()`\n", m.Name)
}
}
metadata := map[string]interface{}{
"module": module.Name,
"name": class.Name,
"doc_url": class.DocURL,
}
return &Document{
ID: generateDocID(class.DocURL),
Source: sourceName,
Type: "ts-class",
Title: class.Name,
Content: content.String(),
URL: class.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *TSDocsScraper) typeAliasToDocument(ta *tsdocs.TypeAlias, module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (type)\n\n", ta.Name)
fmt.Fprintf(&content, "```typescript\ntype %s = %s\n```\n\n", ta.Name, ta.Type)
if ta.Doc != "" {
fmt.Fprintf(&content, "%s\n", ta.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"name": ta.Name,
"doc_url": ta.DocURL,
}
return &Document{
ID: generateDocID(ta.DocURL),
Source: sourceName,
Type: "ts-type",
Title: ta.Name,
Content: content.String(),
URL: ta.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+244
View File
@@ -0,0 +1,244 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/vuedocs"
)
type VueDocsScraper struct {
config *Config
parser *vuedocs.Parser
client *http.Client
}
func NewVueDocsScraper(config *Config) *VueDocsScraper {
return &VueDocsScraper{
config: config,
parser: vuedocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *VueDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Vue docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
ref, err := s.parser.ParseReferencePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse reference: %w", err)
}
mainDoc := s.referenceToDocument(ref, source.Name)
documents = append(documents, mainDoc)
for _, comp := range ref.Composition {
doc := s.compositionToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, dir := range ref.Directives {
doc := s.directiveToDocument(dir, source.Name)
documents = append(documents, doc)
}
for _, comp := range ref.Components {
doc := s.componentToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, api := range ref.GlobalAPI {
doc := s.globalAPIToDocument(api, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *VueDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *VueDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *VueDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *VueDocsScraper) referenceToDocument(ref *vuedocs.Reference, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Vue API Reference\n\n")
fmt.Fprintf(&content, "Composition API: %d, Directives: %d, Components: %d\n", len(ref.Composition), len(ref.Directives), len(ref.Components))
return &Document{
ID: generateDocID(ref.DocURL),
Source: sourceName,
Type: "vue-reference",
Title: "Vue API Reference",
Content: content.String(),
URL: ref.DocURL,
Metadata: map[string]interface{}{"doc_type": "vue-reference"},
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) compositionToDocument(comp *vuedocs.Composition, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", comp.Name)
if comp.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", comp.Signature)
}
fmt.Fprintf(&content, "%s\n", comp.Doc)
metadata := map[string]interface{}{
"name": comp.Name,
"kind": comp.Kind,
"doc_url": comp.DocURL,
"doc_type": "vue-composition",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "vue-composition",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) directiveToDocument(dir *vuedocs.Directive, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", dir.Name)
fmt.Fprintf(&content, "%s\n", dir.Doc)
metadata := map[string]interface{}{
"name": dir.Name,
"doc_url": dir.DocURL,
"doc_type": "vue-directive",
}
return &Document{
ID: generateDocID(dir.DocURL),
Source: sourceName,
Type: "vue-directive",
Title: dir.Name,
Content: content.String(),
URL: dir.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) componentToDocument(comp *vuedocs.Component, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# <%s />\n\n", comp.Name)
fmt.Fprintf(&content, "%s\n", comp.Doc)
if len(comp.Props) > 0 {
fmt.Fprintf(&content, "\n## Props\n")
for _, p := range comp.Props {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"name": comp.Name,
"doc_url": comp.DocURL,
"doc_type": "vue-component",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "vue-component",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) globalAPIToDocument(api *vuedocs.API, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", api.Name)
if api.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", api.Signature)
}
fmt.Fprintf(&content, "%s\n", api.Doc)
metadata := map[string]interface{}{
"name": api.Name,
"category": api.Category,
"doc_url": api.DocURL,
"doc_type": "vue-api",
}
return &Document{
ID: generateDocID(api.DocURL),
Source: sourceName,
Type: "vue-api",
Title: api.Name,
Content: content.String(),
URL: api.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}