mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
294 lines
7.3 KiB
Go
294 lines
7.3 KiB
Go
package tsdocs
|
|
|
|
import (
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/yourorg/devour/pkg/parserutil"
|
|
)
|
|
|
|
type Parser struct {
|
|
baseURL string
|
|
}
|
|
|
|
func NewParser() *Parser {
|
|
return &Parser{
|
|
baseURL: "https://www.typescriptlang.org",
|
|
}
|
|
}
|
|
|
|
func (p *Parser) ParseModulePage(html string, docURL string) (*Module, error) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
module := &Module{
|
|
DocURL: docURL,
|
|
FetchedAt: time.Now(),
|
|
}
|
|
|
|
module.Name = p.extractModuleName(doc)
|
|
module.Doc = p.extractModuleDoc(doc)
|
|
module.Interfaces = p.extractInterfaces(doc, module.Name, docURL)
|
|
module.Types = p.extractTypeAliases(doc, module.Name, docURL)
|
|
module.Functions = p.extractFunctions(doc, module.Name, docURL)
|
|
module.Classes = p.extractClasses(doc, module.Name, docURL)
|
|
module.Enums = p.extractEnums(doc, module.Name, docURL)
|
|
module.Variables = p.extractVariables(doc, module.Name, docURL)
|
|
|
|
return module, nil
|
|
}
|
|
|
|
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var results []*SearchResult
|
|
|
|
doc.Find(".search-result, .ais-Hits-item, li.result").Each(func(i int, s *goquery.Selection) {
|
|
result := &SearchResult{}
|
|
|
|
link := s.Find("a").First()
|
|
result.Name = strings.TrimSpace(link.Text())
|
|
|
|
if href, exists := link.Attr("href"); exists {
|
|
result.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
result.Doc = strings.TrimSpace(s.Find(".summary, p, .description").First().Text())
|
|
|
|
if s.HasClass("interface") || strings.Contains(s.Text(), "interface") {
|
|
result.Kind = "interface"
|
|
} else if s.HasClass("type") || strings.Contains(s.Text(), "type") {
|
|
result.Kind = "type"
|
|
} else if s.HasClass("function") || strings.Contains(s.Text(), "function") {
|
|
result.Kind = "function"
|
|
} else if s.HasClass("class") || strings.Contains(s.Text(), "class") {
|
|
result.Kind = "class"
|
|
} else {
|
|
result.Kind = "doc"
|
|
}
|
|
|
|
results = append(results, result)
|
|
})
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func (p *Parser) extractModuleName(doc *goquery.Document) string {
|
|
title := doc.Find("h1, .title, .page-title").First().Text()
|
|
return strings.TrimSpace(title)
|
|
}
|
|
|
|
func (p *Parser) extractModuleDoc(doc *goquery.Document) string {
|
|
docblock := doc.Find(".markdown p:first-of-type, .content p:first-of-type, #main p").First()
|
|
return strings.TrimSpace(docblock.Text())
|
|
}
|
|
|
|
func (p *Parser) extractInterfaces(doc *goquery.Document, moduleName string, docURL string) []*Interface {
|
|
var interfaces []*Interface
|
|
|
|
doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) {
|
|
text := s.Text()
|
|
if !strings.Contains(strings.ToLower(text), "interface") {
|
|
return
|
|
}
|
|
|
|
iface := &Interface{}
|
|
|
|
nameEl := s.Find("code, .name, a").First()
|
|
if nameEl.Length() == 0 {
|
|
nameEl = s
|
|
}
|
|
iface.Name = strings.TrimSpace(nameEl.Text())
|
|
iface.Name = strings.TrimSuffix(iface.Name, "<")
|
|
iface.Name = strings.Split(iface.Name, "<")[0]
|
|
iface.Name = strings.TrimSpace(iface.Name)
|
|
|
|
if id, exists := s.Attr("id"); exists {
|
|
iface.DocURL = docURL + "#" + id
|
|
}
|
|
|
|
next := s.Next()
|
|
for next.Length() > 0 && !next.Is("h2, h3") {
|
|
if next.Is("p") && iface.Doc == "" {
|
|
iface.Doc = strings.TrimSpace(next.Text())
|
|
}
|
|
next = next.Next()
|
|
}
|
|
|
|
if iface.Name != "" && iface.Name != "interface" {
|
|
interfaces = append(interfaces, iface)
|
|
}
|
|
})
|
|
|
|
return interfaces
|
|
}
|
|
|
|
func (p *Parser) extractTypeAliases(doc *goquery.Document, moduleName string, docURL string) []*TypeAlias {
|
|
var types []*TypeAlias
|
|
|
|
doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) {
|
|
text := s.Text()
|
|
if !strings.Contains(strings.ToLower(text), "type") {
|
|
return
|
|
}
|
|
|
|
ta := &TypeAlias{}
|
|
|
|
nameEl := s.Find("code, .name").First()
|
|
if nameEl.Length() == 0 {
|
|
nameEl = s
|
|
}
|
|
ta.Name = strings.TrimSpace(nameEl.Text())
|
|
ta.Name = strings.TrimSuffix(ta.Name, "<")
|
|
ta.Name = strings.Split(ta.Name, "<")[0]
|
|
ta.Name = strings.TrimSpace(ta.Name)
|
|
|
|
if id, exists := s.Attr("id"); exists {
|
|
ta.DocURL = docURL + "#" + id
|
|
}
|
|
|
|
if ta.Name != "" && ta.Name != "type" {
|
|
types = append(types, ta)
|
|
}
|
|
})
|
|
|
|
return types
|
|
}
|
|
|
|
func (p *Parser) extractFunctions(doc *goquery.Document, moduleName string, docURL string) []*Function {
|
|
var functions []*Function
|
|
|
|
doc.Find("h2, h3, .context-item, .api-item, pre code").Each(func(_ int, s *goquery.Selection) {
|
|
text := s.Text()
|
|
if !strings.Contains(text, "function") && !strings.Contains(text, "(") {
|
|
return
|
|
}
|
|
|
|
fn := &Function{}
|
|
|
|
sigText := text
|
|
if idx := strings.Index(sigText, "("); idx > 0 {
|
|
prefix := sigText[:idx]
|
|
parts := strings.Fields(prefix)
|
|
if len(parts) > 0 {
|
|
fn.Name = parts[len(parts)-1]
|
|
}
|
|
fn.Signature = strings.TrimSpace(sigText)
|
|
}
|
|
|
|
if id, exists := s.Attr("id"); exists {
|
|
fn.DocURL = docURL + "#" + id
|
|
}
|
|
|
|
next := s.Next()
|
|
for next.Length() > 0 && !next.Is("h2, h3, pre") {
|
|
if next.Is("p") && fn.Doc == "" {
|
|
fn.Doc = strings.TrimSpace(next.Text())
|
|
}
|
|
next = next.Next()
|
|
}
|
|
|
|
if fn.Name != "" {
|
|
functions = append(functions, fn)
|
|
}
|
|
})
|
|
|
|
return functions
|
|
}
|
|
|
|
func (p *Parser) extractClasses(doc *goquery.Document, moduleName string, docURL string) []*Class {
|
|
var classes []*Class
|
|
|
|
doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) {
|
|
text := s.Text()
|
|
if !strings.Contains(strings.ToLower(text), "class") {
|
|
return
|
|
}
|
|
|
|
class := &Class{}
|
|
|
|
nameEl := s.Find("code, .name").First()
|
|
if nameEl.Length() == 0 {
|
|
nameEl = s
|
|
}
|
|
class.Name = strings.TrimSpace(nameEl.Text())
|
|
class.Name = strings.TrimSuffix(class.Name, "<")
|
|
class.Name = strings.Split(class.Name, "<")[0]
|
|
class.Name = strings.TrimSpace(class.Name)
|
|
|
|
if id, exists := s.Attr("id"); exists {
|
|
class.DocURL = docURL + "#" + id
|
|
}
|
|
|
|
if class.Name != "" && class.Name != "class" {
|
|
classes = append(classes, class)
|
|
}
|
|
})
|
|
|
|
return classes
|
|
}
|
|
|
|
func (p *Parser) extractEnums(doc *goquery.Document, moduleName string, docURL string) []*Enum {
|
|
var enums []*Enum
|
|
|
|
doc.Find("h2, h3, .context-item").Each(func(_ int, s *goquery.Selection) {
|
|
text := s.Text()
|
|
if !strings.Contains(strings.ToLower(text), "enum") {
|
|
return
|
|
}
|
|
|
|
enum := &Enum{}
|
|
|
|
nameEl := s.Find("code, .name").First()
|
|
if nameEl.Length() == 0 {
|
|
nameEl = s
|
|
}
|
|
enum.Name = strings.TrimSpace(nameEl.Text())
|
|
|
|
if id, exists := s.Attr("id"); exists {
|
|
enum.DocURL = docURL + "#" + id
|
|
}
|
|
|
|
if enum.Name != "" && enum.Name != "enum" {
|
|
enums = append(enums, enum)
|
|
}
|
|
})
|
|
|
|
return enums
|
|
}
|
|
|
|
func (p *Parser) extractVariables(doc *goquery.Document, moduleName string, docURL string) []*Variable {
|
|
var variables []*Variable
|
|
|
|
doc.Find("pre code").Each(func(_ int, s *goquery.Selection) {
|
|
text := s.Text()
|
|
if strings.Contains(text, "const ") || strings.Contains(text, "let ") || strings.Contains(text, "var ") {
|
|
v := &Variable{}
|
|
|
|
if idx := strings.Index(text, "="); idx > 0 {
|
|
decl := text[:idx]
|
|
decl = strings.TrimPrefix(decl, "const")
|
|
decl = strings.TrimPrefix(decl, "let")
|
|
decl = strings.TrimPrefix(decl, "var")
|
|
v.Name = strings.TrimSpace(decl)
|
|
}
|
|
|
|
if v.Name != "" {
|
|
variables = append(variables, v)
|
|
}
|
|
}
|
|
})
|
|
|
|
return variables
|
|
}
|
|
|
|
func resolveURL(base string, href string) string {
|
|
return parserutil.ResolveURL(base, href)
|
|
}
|