Files
Devour/pkg/vuedocs/parser.go
Tomas Dvorak 55885a0e8f first commit
2026-02-22 10:42:17 +01:00

334 lines
8.1 KiB
Go

package vuedocs
import (
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
type Parser struct {
baseURL string
}
func NewParser() *Parser {
return &Parser{
baseURL: "https://vuejs.org",
}
}
func (p *Parser) ParseReferencePage(html string, docURL string) (*Reference, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
ref := &Reference{
DocURL: docURL,
FetchedAt: time.Now(),
}
ref.GlobalAPI = p.extractGlobalAPI(doc, docURL)
ref.Composition = p.extractCompositionAPI(doc, docURL)
ref.Options = p.extractOptionsAPI(doc, docURL)
ref.Directives = p.extractDirectives(doc, docURL)
ref.Components = p.extractComponents(doc, docURL)
ref.SpecialAttrs = p.extractSpecialAttrs(doc, docURL)
return ref, nil
}
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
var results []*SearchResult
doc.Find(".search-result, a[href*='/api/'], .nav-link, .api-link").Each(func(i int, s *goquery.Selection) {
result := &SearchResult{}
result.Name = strings.TrimSpace(s.Text())
if href, exists := s.Attr("href"); exists {
result.DocURL = resolveURL(p.baseURL, href)
if strings.Contains(href, "/composition-") {
result.Kind = "composition"
} else if strings.Contains(href, "/options-") {
result.Kind = "options"
} else if strings.Contains(href, "/directive") {
result.Kind = "directive"
} else if strings.Contains(href, "/component") {
result.Kind = "component"
} else {
result.Kind = "api"
}
}
if result.Name != "" {
results = append(results, result)
}
})
return results, nil
}
func (p *Parser) extractGlobalAPI(doc *goquery.Document, docURL string) []*API {
var apis []*API
doc.Find("h2, h3, .api-item, [id]").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
globalAPIs := []string{"createApp", "createSSRApp", "defineComponent", "defineAsyncComponent", "resolveComponent", "resolveDirective", "withDirectives", "withKeys", "withModifiers"}
isGlobal := false
for _, name := range globalAPIs {
if id == name || strings.Contains(text, name) {
isGlobal = true
break
}
}
if !isGlobal {
return
}
api := &API{}
nameEl := s.Find("code, .name").First()
if nameEl.Length() == 0 {
nameEl = s
}
api.Name = strings.TrimSpace(nameEl.Text())
api.Name = strings.TrimSuffix(api.Name, "(")
api.DocURL = docURL + "#" + api.Name
next := s.Next()
for next.Length() > 0 && !next.Is("h2, h3, h4") {
if next.Is("p") && api.Doc == "" {
api.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
if api.Name != "" {
apis = append(apis, api)
}
})
return apis
}
func (p *Parser) extractCompositionAPI(doc *goquery.Document, docURL string) []*Composition {
var compos []*Composition
doc.Find("h2, h3, h4, .api-item, [id^='ref'], [id^='reactive'], [id^='computed'], [id^='watch'], [id^='on']").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
if id == "" && !strings.Contains(text, "(") {
return
}
compNames := []string{"ref", "reactive", "computed", "watch", "watchEffect", "watchPostEffect", "watchSyncEffect", "onMounted", "onUpdated", "onUnmounted", "onBeforeMount", "onBeforeUpdate", "onBeforeUnmount", "onErrorCaptured", "onRenderTracked", "onRenderTriggered", "provide", "inject", "toRef", "toRefs", "toValue", "unref", "isRef", "shallowRef", "triggerRef", "customRef", "shallowReactive", "readonly", "shallowReadonly", "markRaw", "toRaw"}
isComp := false
for _, name := range compNames {
if id == name || strings.HasPrefix(id, name+"-") || strings.Contains(text, name+"(") {
isComp = true
break
}
}
if !isComp {
return
}
comp := &Composition{}
nameEl := s.Find("code, .name").First()
if nameEl.Length() == 0 {
nameEl = s
}
comp.Name = strings.TrimSpace(nameEl.Text())
comp.Name = strings.TrimSuffix(comp.Name, "(")
if comp.Name == "" && id != "" {
comp.Name = id
}
comp.DocURL = docURL + "#" + comp.Name
next := s.Next()
for next.Length() > 0 && !next.Is("h2, h3, h4") {
if next.Is("p") && comp.Doc == "" {
comp.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
if comp.Name != "" {
compos = append(compos, comp)
}
})
return compos
}
func (p *Parser) extractOptionsAPI(doc *goquery.Document, docURL string) []*Options {
var opts []*Options
optionNames := []string{"data", "props", "computed", "methods", "watch", "emits", "expose", "setup", "name", "components", "directives", "inheritAttrs", "extends", "mixins", "provide", "inject", "template", "render", "renderTracked", "renderTriggered", "errorCaptured", "beforeCreate", "created", "beforeMount", "mounted", "beforeUpdate", "updated", "beforeUnmount", "unmounted", "activated", "deactivated", "serverPrefetch"}
doc.Find("h2, h3, h4, .api-item").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range optionNames {
if id == name || strings.Contains(strings.ToLower(text), name) {
opt := &Options{
Name: name,
}
opt.DocURL = docURL + "#" + name
next := s.Next()
for next.Length() > 0 && !next.Is("h2, h3, h4") {
if next.Is("p") && opt.Doc == "" {
opt.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
opts = append(opts, opt)
break
}
}
})
return opts
}
func (p *Parser) extractDirectives(doc *goquery.Document, docURL string) []*Directive {
var directives []*Directive
directiveNames := []string{"v-bind", "v-on", "v-model", "v-if", "v-else", "v-else-if", "v-show", "v-for", "v-slot", "v-text", "v-html", "v-cloak", "v-once", "v-pre", "v-memo"}
doc.Find("h2, h3, h4, .api-item").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range directiveNames {
if id == name || strings.Contains(text, name) {
dir := &Directive{
Name: name,
}
dir.DocURL = docURL + "#" + name
next := s.Next()
for next.Length() > 0 && !next.Is("h2, h3, h4") {
if next.Is("p") && dir.Doc == "" {
dir.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
directives = append(directives, dir)
break
}
}
})
return directives
}
func (p *Parser) extractComponents(doc *goquery.Document, docURL string) []*Component {
var components []*Component
componentNames := []string{"Transition", "TransitionGroup", "KeepAlive", "Teleport", "Suspense"}
doc.Find("h2, h3, h4, .api-item").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range componentNames {
if id == name || strings.Contains(text, name) {
comp := &Component{
Name: name,
}
comp.DocURL = docURL + "#" + name
next := s.Next()
for next.Length() > 0 && !next.Is("h2, h3, h4") {
if next.Is("p") && comp.Doc == "" {
comp.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
components = append(components, comp)
break
}
}
})
return components
}
func (p *Parser) extractSpecialAttrs(doc *goquery.Document, docURL string) []*SpecialAttr {
var attrs []*SpecialAttr
attrNames := []string{"key", "ref", "is"}
doc.Find("h2, h3, h4, .api-item").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
text := s.Text()
for _, name := range attrNames {
if id == name+"-attribute" || id == name || strings.Contains(text, name+" attribute") {
attr := &SpecialAttr{
Name: name,
}
attr.DocURL = docURL + "#" + name
next := s.Next()
for next.Length() > 0 && !next.Is("h2, h3, h4") {
if next.Is("p") && attr.Doc == "" {
attr.Doc = strings.TrimSpace(next.Text())
}
next = next.Next()
}
attrs = append(attrs, attr)
break
}
}
})
return attrs
}
func resolveURL(base string, href string) string {
if strings.HasPrefix(href, "http") {
return href
}
baseURL, err := url.Parse(base)
if err != nil {
return href
}
hrefURL, err := url.Parse(href)
if err != nil {
return href
}
return baseURL.ResolveReference(hrefURL).String()
}