Files
Devour/pkg/nuxtdocs/parser.go
T
Tomas Dvorak 898a3c303f update
2026-02-24 10:33:59 +01:00

253 lines
6.9 KiB
Go

package nuxtdocs
import (
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/yourorg/devour/pkg/parserutil"
)
type Parser struct {
baseURL string
}
func NewParser() *Parser {
return &Parser{
baseURL: "https://nuxt.com",
}
}
func (p *Parser) ParseReferencePage(html string, docURL string) (*Reference, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
ref := &Reference{
DocURL: docURL,
FetchedAt: time.Now(),
}
ref.Components = p.extractComponents(doc, docURL)
ref.Composables = p.extractComposables(doc, docURL)
ref.Utilities = p.extractUtilities(doc, docURL)
ref.Configs = p.extractConfigs(doc, docURL)
ref.Commands = p.extractCommands(doc, docURL)
return ref, nil
}
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
var results []*SearchResult
doc.Find(".search-result, a[href*='/docs/'], a[href*='/api/'], .nav-link").Each(func(i int, s *goquery.Selection) {
result := &SearchResult{}
result.Name = strings.TrimSpace(s.Text())
if href, exists := s.Attr("href"); exists {
result.DocURL = resolveURL(p.baseURL, href)
if strings.Contains(href, "/components/") {
result.Kind = "component"
} else if strings.Contains(href, "/composables/") {
result.Kind = "composable"
} else if strings.Contains(href, "/utils/") {
result.Kind = "utility"
} else if strings.Contains(href, "/config/") || strings.Contains(href, "/configuration/") {
result.Kind = "config"
} else if strings.Contains(href, "/commands/") {
result.Kind = "command"
} else {
result.Kind = "doc"
}
}
if result.Name != "" {
results = append(results, result)
}
})
return results, nil
}
func (p *Parser) extractComponents(doc *goquery.Document, docURL string) []*Component {
var components []*Component
nuxtComponents := []string{"NuxtPage", "NuxtLayout", "NuxtLink", "NuxtLoadingIndicator", "NuxtErrorBoundary", "NuxtPicture", "NuxtImg", "ClientOnly", "DevOnly"}
doc.Find("h1, h2, h3, .api-item, [id]").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range nuxtComponents {
if id == name || strings.Contains(text, name) || strings.Contains(text, "<"+name) {
comp := &Component{
Name: name,
}
comp.DocURL = docURL + "#" + name
next := s.Next()
for next.Length() > 0 && !next.Is("h1, h2, h3") {
if next.Is("p") && comp.Doc == "" {
comp.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
components = append(components, comp)
break
}
}
})
return components
}
func (p *Parser) extractComposables(doc *goquery.Document, docURL string) []*Composable {
var composables []*Composable
nuxtComposables := []string{"useAsyncData", "useFetch", "useLazyAsyncData", "useLazyFetch", "useNuxtData", "useHead", "useHeadSafe", "useSeoMeta", "useRoute", "useRouter", "useState", "useCookie", "useRequestURL", "useRequestEvent", "useRequestHeaders", "useResponseHeader", "useRuntimeConfig", "useAppConfig", "useError", "createError", "isNuxtError", "showError", "throwError", "clearError", "reloadNuxtApp", "useRequestFetch", "useHydration", "usePreviewMode", "onPrehydrate"}
doc.Find("h1, h2, h3, .api-item, code").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range nuxtComposables {
if id == name || strings.Contains(text, name+"(") {
comp := &Composable{
Name: name,
}
comp.DocURL = docURL + "#" + name
next := s.Next()
for next.Length() > 0 && !next.Is("h1, h2, h3") {
if next.Is("p") && comp.Doc == "" {
comp.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
composables = append(composables, comp)
break
}
}
})
return composables
}
func (p *Parser) extractUtilities(doc *goquery.Document, docURL string) []*Utility {
var utilities []*Utility
nuxtUtils := []string{"navigateTo", "abortNavigation", "setPageLayout", "defineNuxtComponent", "defineNuxtPlugin", "definePayloadHandler", "defineNuxtRouteMiddleware", "definePageMeta", "defineNuxtModule", "addComponent", "addImports", "addPluginTemplate", "createResolver"}
doc.Find("h1, h2, h3, .api-item, code").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range nuxtUtils {
if id == name || strings.Contains(text, name+"(") {
util := &Utility{
Name: name,
}
util.DocURL = docURL + "#" + name
next := s.Next()
for next.Length() > 0 && !next.Is("h1, h2, h3") {
if next.Is("p") && util.Doc == "" {
util.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
utilities = append(utilities, util)
break
}
}
})
return utilities
}
func (p *Parser) extractConfigs(doc *goquery.Document, docURL string) []*Config {
var configs []*Config
nuxtConfigs := []string{"app", "build", "builder", "components", "compatibilityDate", "content", "css", "devtools", "extends", "experimental", "features", "generate", "hooks", "ignore", "imports", "logLevel", "modules", "nitro", "optimization", "pages", "plugins", "postcss", "prepare", "rootDir", "runtimeConfig", "serverDir", "sourcemap", "srcDir", "ssr", "telemetry", "testUtils", "typescript", "vite", "vue", "watchers", "workspaceDir"}
doc.Find("h1, h2, h3, .api-item, [id]").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range nuxtConfigs {
if id == name || strings.Contains(strings.ToLower(text), name) {
cfg := &Config{
Name: name,
}
cfg.DocURL = docURL + "#" + name
next := s.Next()
for next.Length() > 0 && !next.Is("h1, h2, h3") {
if next.Is("p") && cfg.Doc == "" {
cfg.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
configs = append(configs, cfg)
break
}
}
})
return configs
}
func (p *Parser) extractCommands(doc *goquery.Document, docURL string) []*Command {
var commands []*Command
nuxtCommands := []string{"nuxi dev", "nuxi build", "nuxi generate", "nuxi preview", "nuxi analyze", "nuxi cleanup", "nuxi typecheck", "nuxi module", "nuxi info", "nuxi prepare", "nuxi upgrade"}
doc.Find("h1, h2, h3, .api-item, code, pre").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range nuxtCommands {
if id == name || strings.Contains(text, name) {
cmd := &Command{
Name: name,
}
cmd.DocURL = docURL + "#" + strings.ReplaceAll(name, " ", "-")
next := s.Next()
for next.Length() > 0 && !next.Is("h1, h2, h3") {
if next.Is("p") && cmd.Doc == "" {
cmd.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
commands = append(commands, cmd)
break
}
}
})
return commands
}
func resolveURL(base string, href string) string {
return parserutil.ResolveURL(base, href)
}