mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
update
This commit is contained in:
+45
@@ -0,0 +1,45 @@
|
||||
package scraper
|
||||
|
||||
import basescraper "github.com/yourorg/devour/internal/scraper"
|
||||
|
||||
func init() {
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeGoDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewGoDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeRustDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewRustDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypePythonDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewPythonDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeJavaDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewJavaDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeSpringDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewSpringDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeTSDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewTSDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeReactDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewReactDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeVueDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewVueDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeNuxtDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewNuxtDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeMCPDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewMCPDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeDockerDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewDockerDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeCloudflareDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewCloudflareDocsScraper(c)
|
||||
})
|
||||
basescraper.RegisterScraper(basescraper.SourceTypeAstroDocs, func(c *basescraper.Config) basescraper.Scraper {
|
||||
return NewAstroDocsScraper(c)
|
||||
})
|
||||
}
|
||||
Vendored
+27
-12
@@ -155,16 +155,18 @@ func (s *TSDocsScraper) interfaceToDocument(iface *tsdocs.Interface, module *tsd
|
||||
metadata := map[string]interface{}{
|
||||
"module": module.Name,
|
||||
"name": iface.Name,
|
||||
"doc_url": iface.DocURL,
|
||||
"doc_url": coalesceDocURL(iface.DocURL, module.DocURL),
|
||||
}
|
||||
|
||||
docURL := coalesceDocURL(iface.DocURL, module.DocURL)
|
||||
|
||||
return &Document{
|
||||
ID: generateDocID(iface.DocURL),
|
||||
ID: generateDocID(docURL),
|
||||
Source: sourceName,
|
||||
Type: "ts-interface",
|
||||
Title: iface.Name,
|
||||
Content: content.String(),
|
||||
URL: iface.DocURL,
|
||||
URL: docURL,
|
||||
Metadata: metadata,
|
||||
Hash: s.generateHash(content.String()),
|
||||
Timestamp: time.Now(),
|
||||
@@ -185,16 +187,18 @@ func (s *TSDocsScraper) functionToDocument(fn *tsdocs.Function, module *tsdocs.M
|
||||
"module": module.Name,
|
||||
"name": fn.Name,
|
||||
"return_type": fn.ReturnType,
|
||||
"doc_url": fn.DocURL,
|
||||
"doc_url": coalesceDocURL(fn.DocURL, module.DocURL),
|
||||
}
|
||||
|
||||
docURL := coalesceDocURL(fn.DocURL, module.DocURL)
|
||||
|
||||
return &Document{
|
||||
ID: generateDocID(fn.DocURL),
|
||||
ID: generateDocID(docURL),
|
||||
Source: sourceName,
|
||||
Type: "ts-function",
|
||||
Title: fn.Name,
|
||||
Content: content.String(),
|
||||
URL: fn.DocURL,
|
||||
URL: docURL,
|
||||
Metadata: metadata,
|
||||
Hash: s.generateHash(content.String()),
|
||||
Timestamp: time.Now(),
|
||||
@@ -217,16 +221,18 @@ func (s *TSDocsScraper) classToDocument(class *tsdocs.Class, module *tsdocs.Modu
|
||||
metadata := map[string]interface{}{
|
||||
"module": module.Name,
|
||||
"name": class.Name,
|
||||
"doc_url": class.DocURL,
|
||||
"doc_url": coalesceDocURL(class.DocURL, module.DocURL),
|
||||
}
|
||||
|
||||
docURL := coalesceDocURL(class.DocURL, module.DocURL)
|
||||
|
||||
return &Document{
|
||||
ID: generateDocID(class.DocURL),
|
||||
ID: generateDocID(docURL),
|
||||
Source: sourceName,
|
||||
Type: "ts-class",
|
||||
Title: class.Name,
|
||||
Content: content.String(),
|
||||
URL: class.DocURL,
|
||||
URL: docURL,
|
||||
Metadata: metadata,
|
||||
Hash: s.generateHash(content.String()),
|
||||
Timestamp: time.Now(),
|
||||
@@ -244,18 +250,27 @@ func (s *TSDocsScraper) typeAliasToDocument(ta *tsdocs.TypeAlias, module *tsdocs
|
||||
metadata := map[string]interface{}{
|
||||
"module": module.Name,
|
||||
"name": ta.Name,
|
||||
"doc_url": ta.DocURL,
|
||||
"doc_url": coalesceDocURL(ta.DocURL, module.DocURL),
|
||||
}
|
||||
|
||||
docURL := coalesceDocURL(ta.DocURL, module.DocURL)
|
||||
|
||||
return &Document{
|
||||
ID: generateDocID(ta.DocURL),
|
||||
ID: generateDocID(docURL),
|
||||
Source: sourceName,
|
||||
Type: "ts-type",
|
||||
Title: ta.Name,
|
||||
Content: content.String(),
|
||||
URL: ta.DocURL,
|
||||
URL: docURL,
|
||||
Metadata: metadata,
|
||||
Hash: s.generateHash(content.String()),
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
func coalesceDocURL(primary, fallback string) string {
|
||||
if strings.TrimSpace(primary) != "" {
|
||||
return primary
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
+65
@@ -0,0 +1,65 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/yourorg/devour/pkg/tsdocs"
|
||||
)
|
||||
|
||||
func TestTSDocsSubDocsFallbackToModuleURL(t *testing.T) {
|
||||
s := &TSDocsScraper{}
|
||||
module := &tsdocs.Module{
|
||||
Name: "Module",
|
||||
DocURL: "https://www.typescriptlang.org/docs/handbook/2/basic-types.html",
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
build func() *Document
|
||||
docType string
|
||||
}{
|
||||
{
|
||||
name: "interface",
|
||||
build: func() *Document {
|
||||
return s.interfaceToDocument(&tsdocs.Interface{Name: "User", DocURL: ""}, module, "ts")
|
||||
},
|
||||
docType: "ts-interface",
|
||||
},
|
||||
{
|
||||
name: "function",
|
||||
build: func() *Document {
|
||||
return s.functionToDocument(&tsdocs.Function{Name: "parse", DocURL: ""}, module, "ts")
|
||||
},
|
||||
docType: "ts-function",
|
||||
},
|
||||
{
|
||||
name: "class",
|
||||
build: func() *Document {
|
||||
return s.classToDocument(&tsdocs.Class{Name: "Service", DocURL: ""}, module, "ts")
|
||||
},
|
||||
docType: "ts-class",
|
||||
},
|
||||
{
|
||||
name: "type alias",
|
||||
build: func() *Document {
|
||||
return s.typeAliasToDocument(&tsdocs.TypeAlias{Name: "ID", Type: "string", DocURL: ""}, module, "ts")
|
||||
},
|
||||
docType: "ts-type",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
doc := tc.build()
|
||||
if doc.URL != module.DocURL {
|
||||
t.Fatalf("expected fallback URL %q, got %q", module.DocURL, doc.URL)
|
||||
}
|
||||
if got := doc.Metadata["doc_url"]; got != module.DocURL {
|
||||
t.Fatalf("expected metadata doc_url %q, got %#v", module.DocURL, got)
|
||||
}
|
||||
if doc.Type != tc.docType {
|
||||
t.Fatalf("expected doc type %q, got %q", tc.docType, doc.Type)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Vendored
+21
@@ -0,0 +1,21 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
|
||||
basescraper "github.com/yourorg/devour/internal/scraper"
|
||||
)
|
||||
|
||||
type SourceType = basescraper.SourceType
|
||||
|
||||
type Source = basescraper.Source
|
||||
|
||||
type Document = basescraper.Document
|
||||
|
||||
type Config = basescraper.Config
|
||||
|
||||
func generateDocID(urlStr string) string {
|
||||
hash := sha256.Sum256([]byte(urlStr))
|
||||
return hex.EncodeToString(hash[:12])
|
||||
}
|
||||
Reference in New Issue
Block a user