package reactdocs import ( "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/yourorg/devour/pkg/parserutil" ) type Parser struct { baseURL string } func NewParser() *Parser { return &Parser{ baseURL: "https://react.dev", } } func (p *Parser) ParseReferencePage(html string, docURL string) (*Reference, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return nil, err } ref := &Reference{ DocURL: docURL, FetchedAt: time.Now(), } ref.Hooks = p.extractHooks(doc, docURL) ref.Components = p.extractComponents(doc, docURL) ref.APIs = p.extractAPIs(doc, docURL) ref.Directives = p.extractDirectives(doc, docURL) return ref, nil } func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return nil, err } var results []*SearchResult doc.Find(".search-result, a[href*='/reference/'], .nav-link").Each(func(i int, s *goquery.Selection) { result := &SearchResult{} result.Name = strings.TrimSpace(s.Text()) if href, exists := s.Attr("href"); exists { result.DocURL = resolveURL(p.baseURL, href) if strings.Contains(href, "/hooks/") { result.Kind = "hook" } else if strings.Contains(href, "/components/") { result.Kind = "component" } else if strings.Contains(href, "/apis/") { result.Kind = "api" } else { result.Kind = "doc" } } if result.Name != "" { results = append(results, result) } }) return results, nil } func (p *Parser) extractHooks(doc *goquery.Document, docURL string) []*Hook { var hooks []*Hook doc.Find("h2, h3, .api-item, [id^='use']").Each(func(_ int, s *goquery.Selection) { id, _ := s.Attr("id") text := s.Text() if !strings.HasPrefix(id, "use") && !strings.Contains(text, "use") { return } hook := &Hook{} nameEl := s.Find("code, .name").First() if nameEl.Length() == 0 { nameEl = s } hook.Name = strings.TrimSpace(nameEl.Text()) hook.Name = strings.TrimSuffix(hook.Name, "(") if strings.HasPrefix(hook.Name, "use") { hook.DocURL = docURL + "#" + hook.Name next := s.Next() for next.Length() > 0 && !next.Is("h2, h3") { if next.Is("p") && hook.Doc == "" { hook.Doc = strings.TrimSpace(next.Text()) } else if next.Is("pre, code") { sig := strings.TrimSpace(next.Text()) if strings.HasPrefix(sig, hook.Name) { hook.Signature = sig } } next = next.Next() } hooks = append(hooks, hook) } }) return hooks } func (p *Parser) extractComponents(doc *goquery.Document, docURL string) []*Component { var components []*Component doc.Find("h2, h3, .api-item").Each(func(_ int, s *goquery.Selection) { id, _ := s.Attr("id") text := s.Text() componentNames := []string{"Fragment", "Profiler", "StrictMode", "Suspense", "Transition", "Portal", "Component"} isComponent := false for _, name := range componentNames { if id == name || strings.Contains(text, name) { isComponent = true break } } if !isComponent { return } comp := &Component{} nameEl := s.Find("code, .name").First() if nameEl.Length() == 0 { nameEl = s } comp.Name = strings.TrimSpace(nameEl.Text()) comp.DocURL = docURL + "#" + comp.Name next := s.Next() for next.Length() > 0 && !next.Is("h2, h3") { if next.Is("p") && comp.Doc == "" { comp.Doc = strings.TrimSpace(next.Text()) } next = next.Next() } if comp.Name != "" { components = append(components, comp) } }) return components } func (p *Parser) extractAPIs(doc *goquery.Document, docURL string) []*API { var apis []*API doc.Find("h2, h3, .api-item").Each(func(_ int, s *goquery.Selection) { id, _ := s.Attr("id") text := s.Text() apiNames := []string{"createContext", "createElement", "createFactory", "createRef", "forwardRef", "isValidElement", "lazy", "memo", "startTransition", "cloneElement", "Children"} isAPI := false for _, name := range apiNames { if id == name || strings.Contains(text, name) { isAPI = true break } } if !isAPI { return } api := &API{} nameEl := s.Find("code, .name").First() if nameEl.Length() == 0 { nameEl = s } api.Name = strings.TrimSpace(nameEl.Text()) api.Name = strings.TrimSuffix(api.Name, "(") api.DocURL = docURL + "#" + api.Name next := s.Next() for next.Length() > 0 && !next.Is("h2, h3") { if next.Is("p") && api.Doc == "" { api.Doc = strings.TrimSpace(next.Text()) } else if next.Is("pre, code") { sig := strings.TrimSpace(next.Text()) if strings.HasPrefix(sig, api.Name) || strings.Contains(sig, api.Name) { api.Signature = sig } } next = next.Next() } if api.Name != "" { apis = append(apis, api) } }) return apis } func (p *Parser) extractDirectives(doc *goquery.Document, docURL string) []*Directive { var directives []*Directive directiveNames := []string{"use client", "use server"} doc.Find("h2, h3, code, pre").Each(func(_ int, s *goquery.Selection) { text := strings.TrimSpace(s.Text()) for _, name := range directiveNames { if text == name || strings.Contains(text, "'"+name+"'") { dir := &Directive{ Name: name, Usage: text, } dir.DocURL = docURL + "#" + strings.ReplaceAll(name, " ", "-") next := s.Next() for next.Length() > 0 && !next.Is("h2, h3") { if next.Is("p") && dir.Doc == "" { dir.Doc = strings.TrimSpace(next.Text()) } next = next.Next() } directives = append(directives, dir) break } } }) return directives } func (p *Parser) ParseHookPage(html string, docURL string) (*Hook, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return nil, err } hook := &Hook{ DocURL: docURL, } header := doc.Find("h1, .title").First() hook.Name = strings.TrimSpace(header.Text()) hook.Doc = strings.TrimSpace(doc.Find(".content p, main p, article p").First().Text()) sigEl := doc.Find("pre code, .signature, code").First() hook.Signature = strings.TrimSpace(sigEl.Text()) return hook, nil } func resolveURL(base string, href string) string { return parserutil.ResolveURL(base, href) }