mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,212 @@
|
||||
package mcpdocs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
baseURL string
|
||||
}
|
||||
|
||||
func NewParser() *Parser {
|
||||
return &Parser{
|
||||
baseURL: "https://hub.docker.com",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) ParseServerPage(html string, docURL string) (*Server, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
server := &Server{
|
||||
DocURL: docURL,
|
||||
FetchedAt: time.Now(),
|
||||
}
|
||||
|
||||
server.Name = p.extractServerName(doc)
|
||||
server.Description = p.extractDescription(doc)
|
||||
server.Image = p.extractImage(doc)
|
||||
server.Category = p.extractCategory(doc)
|
||||
server.Tools = p.extractTools(doc, docURL)
|
||||
server.Resources = p.extractResources(doc, docURL)
|
||||
server.Prompts = p.extractPrompts(doc, docURL)
|
||||
|
||||
return server, nil
|
||||
}
|
||||
|
||||
func (p *Parser) ParseHubPage(html string) ([]*Server, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var servers []*Server
|
||||
|
||||
doc.Find("a[href*='/mcp/server/'], .server-card, .mcp-server-item").Each(func(_ int, s *goquery.Selection) {
|
||||
server := &Server{}
|
||||
|
||||
server.Name = strings.TrimSpace(s.Find("h1, h2, h3, .name, .title").First().Text())
|
||||
server.Description = strings.TrimSpace(s.Find(".description, p").First().Text())
|
||||
|
||||
if href, exists := s.Attr("href"); exists {
|
||||
server.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
if server.Name != "" {
|
||||
servers = append(servers, server)
|
||||
}
|
||||
})
|
||||
|
||||
return servers, nil
|
||||
}
|
||||
|
||||
func (p *Parser) extractServerName(doc *goquery.Document) string {
|
||||
title := doc.Find("h1").First().Text()
|
||||
title = strings.TrimSpace(title)
|
||||
|
||||
if title == "" {
|
||||
title = doc.Find("title").First().Text()
|
||||
title = strings.TrimSpace(title)
|
||||
if idx := strings.Index(title, " | "); idx > 0 {
|
||||
title = title[:idx]
|
||||
}
|
||||
}
|
||||
|
||||
return title
|
||||
}
|
||||
|
||||
func (p *Parser) extractDescription(doc *goquery.Document) string {
|
||||
desc := doc.Find("meta[name='description']").AttrOr("content", "")
|
||||
if desc != "" {
|
||||
return desc
|
||||
}
|
||||
|
||||
desc = doc.Find(".description, .overview, .introduction, p:first-of-type").First().Text()
|
||||
return strings.TrimSpace(desc)
|
||||
}
|
||||
|
||||
func (p *Parser) extractImage(doc *goquery.Document) string {
|
||||
return doc.Find("meta[property='og:image']").AttrOr("content", "")
|
||||
}
|
||||
|
||||
func (p *Parser) extractCategory(doc *goquery.Document) string {
|
||||
return doc.Find(".category, .tag").First().Text()
|
||||
}
|
||||
|
||||
func (p *Parser) extractTools(doc *goquery.Document, docURL string) []*Tool {
|
||||
var tools []*Tool
|
||||
|
||||
doc.Find("h2:contains('Tools'), h3:contains('Tools')").Each(func(_ int, heading *goquery.Selection) {
|
||||
container := heading.Next()
|
||||
for container.Length() > 0 && !container.Is("h2, h3") {
|
||||
container.Find("li, .tool, .item").Each(func(_ int, item *goquery.Selection) {
|
||||
tool := &Tool{}
|
||||
|
||||
tool.Name = strings.TrimSpace(item.Find("code, .name, strong").First().Text())
|
||||
tool.Description = strings.TrimSpace(item.Find(".description, p").First().Text())
|
||||
tool.DocURL = docURL
|
||||
|
||||
if tool.Name != "" {
|
||||
tools = append(tools, tool)
|
||||
}
|
||||
})
|
||||
container = container.Next()
|
||||
}
|
||||
})
|
||||
|
||||
doc.Find("pre code, .code-block").Each(func(_ int, code *goquery.Selection) {
|
||||
text := code.Text()
|
||||
if strings.Contains(text, "tools") && strings.Contains(text, "name") {
|
||||
lines := strings.Split(text, "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "name:") || strings.Contains(line, `"name"`) {
|
||||
tool := &Tool{
|
||||
DocURL: docURL,
|
||||
}
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) > 1 {
|
||||
tool.Name = strings.Trim(strings.TrimSpace(parts[1]), `"`)
|
||||
}
|
||||
if tool.Name != "" {
|
||||
tools = append(tools, tool)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return tools
|
||||
}
|
||||
|
||||
func (p *Parser) extractResources(doc *goquery.Document, docURL string) []*Resource {
|
||||
var resources []*Resource
|
||||
|
||||
doc.Find("h2:contains('Resources'), h3:contains('Resources')").Each(func(_ int, heading *goquery.Selection) {
|
||||
container := heading.Next()
|
||||
for container.Length() > 0 && !container.Is("h2, h3") {
|
||||
container.Find("li, .resource, .item").Each(func(_ int, item *goquery.Selection) {
|
||||
res := &Resource{}
|
||||
|
||||
res.Name = strings.TrimSpace(item.Find("code, .name, strong").First().Text())
|
||||
res.Description = strings.TrimSpace(item.Find(".description, p").First().Text())
|
||||
res.DocURL = docURL
|
||||
|
||||
if res.Name != "" {
|
||||
resources = append(resources, res)
|
||||
}
|
||||
})
|
||||
container = container.Next()
|
||||
}
|
||||
})
|
||||
|
||||
return resources
|
||||
}
|
||||
|
||||
func (p *Parser) extractPrompts(doc *goquery.Document, docURL string) []*Prompt {
|
||||
var prompts []*Prompt
|
||||
|
||||
doc.Find("h2:contains('Prompts'), h3:contains('Prompts')").Each(func(_ int, heading *goquery.Selection) {
|
||||
container := heading.Next()
|
||||
for container.Length() > 0 && !container.Is("h2, h3") {
|
||||
container.Find("li, .prompt, .item").Each(func(_ int, item *goquery.Selection) {
|
||||
prompt := &Prompt{}
|
||||
|
||||
prompt.Name = strings.TrimSpace(item.Find("code, .name, strong").First().Text())
|
||||
prompt.Description = strings.TrimSpace(item.Find(".description, p").First().Text())
|
||||
prompt.DocURL = docURL
|
||||
|
||||
if prompt.Name != "" {
|
||||
prompts = append(prompts, prompt)
|
||||
}
|
||||
})
|
||||
container = container.Next()
|
||||
}
|
||||
})
|
||||
|
||||
return prompts
|
||||
}
|
||||
|
||||
func resolveURL(base string, href string) string {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
return href
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return baseURL.ResolveReference(hrefURL).String()
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
package mcpdocs
|
||||
|
||||
import "time"
|
||||
|
||||
type Server struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Image string `json:"image,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Category string `json:"category,omitempty"`
|
||||
Tools []*Tool `json:"tools,omitempty"`
|
||||
Resources []*Resource `json:"resources,omitempty"`
|
||||
Prompts []*Prompt `json:"prompts,omitempty"`
|
||||
FetchedAt time.Time `json:"fetched_at"`
|
||||
}
|
||||
|
||||
type Tool struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
InputSchema *InputSchema `json:"input_schema,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
type Resource struct {
|
||||
URI string `json:"uri"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
MimeType string `json:"mime_type,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
type Prompt struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Arguments []*Argument `json:"arguments,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
type Argument struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Required bool `json:"required"`
|
||||
}
|
||||
|
||||
type InputSchema struct {
|
||||
Type string `json:"type"`
|
||||
Properties map[string]interface{} `json:"properties,omitempty"`
|
||||
Required []string `json:"required,omitempty"`
|
||||
}
|
||||
Reference in New Issue
Block a user