mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,286 @@
|
||||
package reactdocs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
baseURL string
|
||||
}
|
||||
|
||||
func NewParser() *Parser {
|
||||
return &Parser{
|
||||
baseURL: "https://react.dev",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) ParseReferencePage(html string, docURL string) (*Reference, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ref := &Reference{
|
||||
DocURL: docURL,
|
||||
FetchedAt: time.Now(),
|
||||
}
|
||||
|
||||
ref.Hooks = p.extractHooks(doc, docURL)
|
||||
ref.Components = p.extractComponents(doc, docURL)
|
||||
ref.APIs = p.extractAPIs(doc, docURL)
|
||||
ref.Directives = p.extractDirectives(doc, docURL)
|
||||
|
||||
return ref, nil
|
||||
}
|
||||
|
||||
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []*SearchResult
|
||||
|
||||
doc.Find(".search-result, a[href*='/reference/'], .nav-link").Each(func(i int, s *goquery.Selection) {
|
||||
result := &SearchResult{}
|
||||
|
||||
result.Name = strings.TrimSpace(s.Text())
|
||||
|
||||
if href, exists := s.Attr("href"); exists {
|
||||
result.DocURL = resolveURL(p.baseURL, href)
|
||||
|
||||
if strings.Contains(href, "/hooks/") {
|
||||
result.Kind = "hook"
|
||||
} else if strings.Contains(href, "/components/") {
|
||||
result.Kind = "component"
|
||||
} else if strings.Contains(href, "/apis/") {
|
||||
result.Kind = "api"
|
||||
} else {
|
||||
result.Kind = "doc"
|
||||
}
|
||||
}
|
||||
|
||||
if result.Name != "" {
|
||||
results = append(results, result)
|
||||
}
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (p *Parser) extractHooks(doc *goquery.Document, docURL string) []*Hook {
|
||||
var hooks []*Hook
|
||||
|
||||
doc.Find("h2, h3, .api-item, [id^='use']").Each(func(_ int, s *goquery.Selection) {
|
||||
id, _ := s.Attr("id")
|
||||
text := s.Text()
|
||||
|
||||
if !strings.HasPrefix(id, "use") && !strings.Contains(text, "use") {
|
||||
return
|
||||
}
|
||||
|
||||
hook := &Hook{}
|
||||
|
||||
nameEl := s.Find("code, .name").First()
|
||||
if nameEl.Length() == 0 {
|
||||
nameEl = s
|
||||
}
|
||||
hook.Name = strings.TrimSpace(nameEl.Text())
|
||||
hook.Name = strings.TrimSuffix(hook.Name, "(")
|
||||
|
||||
if strings.HasPrefix(hook.Name, "use") {
|
||||
hook.DocURL = docURL + "#" + hook.Name
|
||||
|
||||
next := s.Next()
|
||||
for next.Length() > 0 && !next.Is("h2, h3") {
|
||||
if next.Is("p") && hook.Doc == "" {
|
||||
hook.Doc = strings.TrimSpace(next.Text())
|
||||
} else if next.Is("pre, code") {
|
||||
sig := strings.TrimSpace(next.Text())
|
||||
if strings.HasPrefix(sig, hook.Name) {
|
||||
hook.Signature = sig
|
||||
}
|
||||
}
|
||||
next = next.Next()
|
||||
}
|
||||
|
||||
hooks = append(hooks, hook)
|
||||
}
|
||||
})
|
||||
|
||||
return hooks
|
||||
}
|
||||
|
||||
func (p *Parser) extractComponents(doc *goquery.Document, docURL string) []*Component {
|
||||
var components []*Component
|
||||
|
||||
doc.Find("h2, h3, .api-item").Each(func(_ int, s *goquery.Selection) {
|
||||
id, _ := s.Attr("id")
|
||||
text := s.Text()
|
||||
|
||||
componentNames := []string{"Fragment", "Profiler", "StrictMode", "Suspense", "Transition", "Portal", "Component"}
|
||||
isComponent := false
|
||||
for _, name := range componentNames {
|
||||
if id == name || strings.Contains(text, name) {
|
||||
isComponent = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !isComponent {
|
||||
return
|
||||
}
|
||||
|
||||
comp := &Component{}
|
||||
|
||||
nameEl := s.Find("code, .name").First()
|
||||
if nameEl.Length() == 0 {
|
||||
nameEl = s
|
||||
}
|
||||
comp.Name = strings.TrimSpace(nameEl.Text())
|
||||
|
||||
comp.DocURL = docURL + "#" + comp.Name
|
||||
|
||||
next := s.Next()
|
||||
for next.Length() > 0 && !next.Is("h2, h3") {
|
||||
if next.Is("p") && comp.Doc == "" {
|
||||
comp.Doc = strings.TrimSpace(next.Text())
|
||||
}
|
||||
next = next.Next()
|
||||
}
|
||||
|
||||
if comp.Name != "" {
|
||||
components = append(components, comp)
|
||||
}
|
||||
})
|
||||
|
||||
return components
|
||||
}
|
||||
|
||||
func (p *Parser) extractAPIs(doc *goquery.Document, docURL string) []*API {
|
||||
var apis []*API
|
||||
|
||||
doc.Find("h2, h3, .api-item").Each(func(_ int, s *goquery.Selection) {
|
||||
id, _ := s.Attr("id")
|
||||
text := s.Text()
|
||||
|
||||
apiNames := []string{"createContext", "createElement", "createFactory", "createRef", "forwardRef", "isValidElement", "lazy", "memo", "startTransition", "cloneElement", "Children"}
|
||||
isAPI := false
|
||||
for _, name := range apiNames {
|
||||
if id == name || strings.Contains(text, name) {
|
||||
isAPI = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !isAPI {
|
||||
return
|
||||
}
|
||||
|
||||
api := &API{}
|
||||
|
||||
nameEl := s.Find("code, .name").First()
|
||||
if nameEl.Length() == 0 {
|
||||
nameEl = s
|
||||
}
|
||||
api.Name = strings.TrimSpace(nameEl.Text())
|
||||
api.Name = strings.TrimSuffix(api.Name, "(")
|
||||
|
||||
api.DocURL = docURL + "#" + api.Name
|
||||
|
||||
next := s.Next()
|
||||
for next.Length() > 0 && !next.Is("h2, h3") {
|
||||
if next.Is("p") && api.Doc == "" {
|
||||
api.Doc = strings.TrimSpace(next.Text())
|
||||
} else if next.Is("pre, code") {
|
||||
sig := strings.TrimSpace(next.Text())
|
||||
if strings.HasPrefix(sig, api.Name) || strings.Contains(sig, api.Name) {
|
||||
api.Signature = sig
|
||||
}
|
||||
}
|
||||
next = next.Next()
|
||||
}
|
||||
|
||||
if api.Name != "" {
|
||||
apis = append(apis, api)
|
||||
}
|
||||
})
|
||||
|
||||
return apis
|
||||
}
|
||||
|
||||
func (p *Parser) extractDirectives(doc *goquery.Document, docURL string) []*Directive {
|
||||
var directives []*Directive
|
||||
|
||||
directiveNames := []string{"use client", "use server"}
|
||||
|
||||
doc.Find("h2, h3, code, pre").Each(func(_ int, s *goquery.Selection) {
|
||||
text := strings.TrimSpace(s.Text())
|
||||
|
||||
for _, name := range directiveNames {
|
||||
if text == name || strings.Contains(text, "'"+name+"'") {
|
||||
dir := &Directive{
|
||||
Name: name,
|
||||
Usage: text,
|
||||
}
|
||||
dir.DocURL = docURL + "#" + strings.ReplaceAll(name, " ", "-")
|
||||
|
||||
next := s.Next()
|
||||
for next.Length() > 0 && !next.Is("h2, h3") {
|
||||
if next.Is("p") && dir.Doc == "" {
|
||||
dir.Doc = strings.TrimSpace(next.Text())
|
||||
}
|
||||
next = next.Next()
|
||||
}
|
||||
|
||||
directives = append(directives, dir)
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return directives
|
||||
}
|
||||
|
||||
func (p *Parser) ParseHookPage(html string, docURL string) (*Hook, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hook := &Hook{
|
||||
DocURL: docURL,
|
||||
}
|
||||
|
||||
header := doc.Find("h1, .title").First()
|
||||
hook.Name = strings.TrimSpace(header.Text())
|
||||
|
||||
hook.Doc = strings.TrimSpace(doc.Find(".content p, main p, article p").First().Text())
|
||||
|
||||
sigEl := doc.Find("pre code, .signature, code").First()
|
||||
hook.Signature = strings.TrimSpace(sigEl.Text())
|
||||
|
||||
return hook, nil
|
||||
}
|
||||
|
||||
func resolveURL(base string, href string) string {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
return href
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return baseURL.ResolveReference(hrefURL).String()
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
package reactdocs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const testReferencePageHTML = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>React Reference</h1>
|
||||
|
||||
<h2 id="useState">useState</h2>
|
||||
<p>useState is a React Hook that lets you add a state variable to your component.</p>
|
||||
<pre><code>const [state, setState] = useState(initialState)</code></pre>
|
||||
|
||||
<h2 id="useEffect">useEffect</h2>
|
||||
<p>useEffect is a React Hook that lets you synchronize a component with an external system.</p>
|
||||
<pre><code>useEffect(setup, dependencies?)</code></pre>
|
||||
|
||||
<h2 id="Suspense">Suspense</h2>
|
||||
<p>Suspense lets you display a fallback until its children have finished loading.</p>
|
||||
|
||||
<h2 id="createContext">createContext</h2>
|
||||
<p>createContext lets you create a Context that components can provide or read.</p>
|
||||
<pre><code>createContext(initialValue)</code></pre>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
func TestParseReferencePage(t *testing.T) {
|
||||
parser := NewParser()
|
||||
ref, err := parser.ParseReferencePage(testReferencePageHTML, "https://react.dev/reference/react")
|
||||
if err != nil {
|
||||
t.Fatalf("ParseReferencePage failed: %v", err)
|
||||
}
|
||||
|
||||
if len(ref.Hooks) == 0 {
|
||||
t.Error("Expected at least one hook")
|
||||
}
|
||||
|
||||
if len(ref.Components) == 0 {
|
||||
t.Error("Expected at least one component")
|
||||
}
|
||||
|
||||
if len(ref.APIs) == 0 {
|
||||
t.Error("Expected at least one API")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractHooks(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testReferencePageHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
hooks := parser.extractHooks(doc, "https://react.dev/reference/react")
|
||||
|
||||
if len(hooks) == 0 {
|
||||
t.Fatal("Expected at least one hook")
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, h := range hooks {
|
||||
if h.Name == "useState" {
|
||||
found = true
|
||||
if h.Doc == "" {
|
||||
t.Error("Expected useState to have documentation")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
t.Error("Expected to find useState hook")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractComponents(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testReferencePageHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
components := parser.extractComponents(doc, "https://react.dev/reference/react")
|
||||
|
||||
found := false
|
||||
for _, c := range components {
|
||||
if c.Name == "Suspense" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
t.Error("Expected to find Suspense component")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
base string
|
||||
href string
|
||||
expected string
|
||||
}{
|
||||
{"https://react.dev", "/reference/react", "https://react.dev/reference/react"},
|
||||
{"https://react.dev", "https://example.com/page", "https://example.com/page"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.href, func(t *testing.T) {
|
||||
got := resolveURL(tt.base, tt.href)
|
||||
if got != tt.expected {
|
||||
t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.href, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
// Package reactdocs provides parsing and extraction for React documentation
|
||||
// from react.dev.
|
||||
package reactdocs
|
||||
|
||||
import "time"
|
||||
|
||||
// Reference represents the React API reference.
|
||||
type Reference struct {
|
||||
Version string `json:"version"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Hooks []*Hook `json:"hooks,omitempty"`
|
||||
Components []*Component `json:"components,omitempty"`
|
||||
APIs []*API `json:"apis,omitempty"`
|
||||
Directives []*Directive `json:"directives,omitempty"`
|
||||
FetchedAt time.Time `json:"fetched_at"`
|
||||
}
|
||||
|
||||
// Hook represents a React hook.
|
||||
type Hook struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
Parameters []*Parameter `json:"parameters,omitempty"`
|
||||
Returns []*Return `json:"returns,omitempty"`
|
||||
Examples []*Example `json:"examples,omitempty"`
|
||||
Category string `json:"category,omitempty"` // state, effect, ref, etc.
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// Component represents a React built-in component.
|
||||
type Component struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Props []*Prop `json:"props,omitempty"`
|
||||
Examples []*Example `json:"examples,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// Prop represents a component prop.
|
||||
type Prop struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Required bool `json:"required"`
|
||||
Default string `json:"default,omitempty"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// API represents a React API function.
|
||||
type API struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
Parameters []*Parameter `json:"parameters,omitempty"`
|
||||
Returns []*Return `json:"returns,omitempty"`
|
||||
Examples []*Example `json:"examples,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// Directive represents a React directive (like 'use client', 'use server').
|
||||
type Directive struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Usage string `json:"usage,omitempty"`
|
||||
Examples []*Example `json:"examples,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
// Parameter represents a function parameter.
|
||||
type Parameter struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Optional bool `json:"optional"`
|
||||
Default string `json:"default,omitempty"`
|
||||
}
|
||||
|
||||
// Return represents a return value.
|
||||
type Return struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
}
|
||||
|
||||
// Example represents a code example.
|
||||
type Example struct {
|
||||
Title string `json:"title,omitempty"`
|
||||
Code string `json:"code"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Language string `json:"language,omitempty"`
|
||||
}
|
||||
|
||||
// SearchResult represents a search result.
|
||||
type SearchResult struct {
|
||||
Name string `json:"name"`
|
||||
Kind string `json:"kind"` // hook, component, api, directive
|
||||
Doc string `json:"doc,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Score int `json:"score"`
|
||||
Deprecated bool `json:"deprecated"`
|
||||
}
|
||||
Reference in New Issue
Block a user