mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
634 lines
16 KiB
Go
634 lines
16 KiB
Go
package rustdocs
|
|
|
|
import (
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
type Parser struct {
|
|
baseURL string
|
|
}
|
|
|
|
func NewParser() *Parser {
|
|
return &Parser{
|
|
baseURL: "https://docs.rs",
|
|
}
|
|
}
|
|
|
|
func (p *Parser) ParseCratePage(html string, docURL string) (*Crate, error) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
crate := &Crate{
|
|
DocURL: docURL,
|
|
FetchedAt: time.Now(),
|
|
}
|
|
|
|
crate.Name = p.extractCrateName(doc)
|
|
crate.Version = p.extractVersion(doc)
|
|
crate.Description = p.extractDescription(doc)
|
|
crate.Repository = p.extractRepository(doc)
|
|
|
|
crate.Modules = p.extractModules(doc)
|
|
crate.Structs = p.extractStructs(doc)
|
|
crate.Enums = p.extractEnums(doc)
|
|
crate.Traits = p.extractTraits(doc)
|
|
crate.Functions = p.extractFunctions(doc)
|
|
crate.Macros = p.extractMacros(doc)
|
|
crate.Constants = p.extractConstants(doc)
|
|
crate.Statics = p.extractStatics(doc)
|
|
|
|
return crate, nil
|
|
}
|
|
|
|
func (p *Parser) ParseItemPage(html string, docURL string) (*Symbol, error) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
symbol := &Symbol{
|
|
DocURL: docURL,
|
|
}
|
|
|
|
symbol.Name = p.extractItemName(doc)
|
|
symbol.Path = p.extractItemPath(doc, docURL)
|
|
symbol.Kind = p.extractItemKind(doc)
|
|
symbol.Signature = p.extractItemSignature(doc)
|
|
symbol.Doc = p.extractItemDoc(doc)
|
|
|
|
return symbol, nil
|
|
}
|
|
|
|
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var results []*SearchResult
|
|
|
|
doc.Find("#results .search-results a").Each(func(i int, s *goquery.Selection) {
|
|
result := &SearchResult{}
|
|
|
|
classes, _ := s.Attr("class")
|
|
result.Kind = extractKindFromClasses(classes)
|
|
|
|
nameEl := s.Find(".result-name")
|
|
result.Name = strings.TrimSpace(nameEl.Find(".method, .struct, .fn, .trait, .enum, .mod, .macro, .const, .static, .attr").Text())
|
|
if result.Name == "" {
|
|
nameText := nameEl.Text()
|
|
result.Name = strings.TrimSpace(strings.Split(nameText, "\n")[0])
|
|
}
|
|
|
|
var pathParts []string
|
|
nameEl.Find(".path span").Each(func(_ int, span *goquery.Selection) {
|
|
part := strings.TrimSpace(span.Text())
|
|
if part != "" {
|
|
pathParts = append(pathParts, part)
|
|
}
|
|
})
|
|
result.Path = strings.Join(pathParts, "::")
|
|
|
|
result.Description = strings.TrimSpace(s.Find(".desc").Text())
|
|
|
|
if href, exists := s.Attr("href"); exists {
|
|
if strings.HasPrefix(href, "http") {
|
|
result.DocURL = href
|
|
} else {
|
|
u, err := url.Parse("https://docs.rs")
|
|
if err == nil {
|
|
u.Path = href
|
|
result.DocURL = u.String()
|
|
}
|
|
}
|
|
}
|
|
|
|
stabilityEl := s.Find(".stab")
|
|
if stabilityEl.Length() > 0 {
|
|
if stabilityEl.HasClass("unstable") || stabilityEl.HasClass("experimental") {
|
|
result.IsExperimental = true
|
|
}
|
|
result.Stability = strings.TrimSpace(stabilityEl.Text())
|
|
}
|
|
|
|
results = append(results, result)
|
|
})
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func (p *Parser) extractCrateName(doc *goquery.Document) string {
|
|
title := doc.Find(".main-heading h1").Text()
|
|
title = strings.TrimSpace(title)
|
|
|
|
if strings.HasPrefix(title, "Crate ") {
|
|
return strings.TrimPrefix(title, "Crate ")
|
|
}
|
|
if strings.HasPrefix(title, "Module ") {
|
|
return strings.TrimPrefix(title, "Module ")
|
|
}
|
|
|
|
h1 := doc.Find("h1").First().Text()
|
|
h1 = strings.TrimSpace(h1)
|
|
if strings.HasPrefix(h1, "Crate ") {
|
|
return strings.TrimPrefix(h1, "Crate ")
|
|
}
|
|
|
|
return title
|
|
}
|
|
|
|
func (p *Parser) extractVersion(doc *goquery.Document) string {
|
|
since := doc.Find(".since").Text()
|
|
if since != "" {
|
|
re := regexp.MustCompile(`\d+\.\d+\.\d+`)
|
|
if match := re.FindString(since); match != "" {
|
|
return match
|
|
}
|
|
}
|
|
|
|
subHeading := doc.Find(".sub-heading").Text()
|
|
re := regexp.MustCompile(`v?(\d+\.\d+\.\d+)`)
|
|
if match := re.FindStringSubmatch(subHeading); len(match) > 1 {
|
|
return match[1]
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func (p *Parser) extractDescription(doc *goquery.Document) string {
|
|
topDoc := doc.Find(".top-doc .docblock").First()
|
|
if topDoc.Length() > 0 {
|
|
return strings.TrimSpace(topDoc.Text())
|
|
}
|
|
|
|
topDoc = doc.Find(".docblock").First()
|
|
if topDoc.Length() > 0 {
|
|
return strings.TrimSpace(topDoc.Text())
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func (p *Parser) extractRepository(doc *goquery.Document) string {
|
|
srcLink := doc.Find("a.src")
|
|
if srcLink.Length() > 0 {
|
|
if href, exists := srcLink.Attr("href"); exists {
|
|
if strings.Contains(href, "github.com") {
|
|
re := regexp.MustCompile(`https://github\.com/[^/]+/[^/]+`)
|
|
if match := re.FindString(href); match != "" {
|
|
return match
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (p *Parser) extractItemName(doc *goquery.Document) string {
|
|
h1 := doc.Find(".main-heading h1").Text()
|
|
h1 = strings.TrimSpace(h1)
|
|
|
|
for _, prefix := range []string{"Struct ", "Enum ", "Trait ", "Fn ", "Macro ", "Const ", "Static ", "Module ", "Type "} {
|
|
if strings.HasPrefix(h1, prefix) {
|
|
return strings.TrimPrefix(h1, prefix)
|
|
}
|
|
}
|
|
|
|
return h1
|
|
}
|
|
|
|
func (p *Parser) extractItemPath(doc *goquery.Document, docURL string) string {
|
|
breadcrumbs := doc.Find(".rustdoc-breadcrumbs").Text()
|
|
breadcrumbs = strings.TrimSpace(breadcrumbs)
|
|
breadcrumbs = strings.ReplaceAll(breadcrumbs, "\n", "")
|
|
breadcrumbs = strings.ReplaceAll(breadcrumbs, " ", " ")
|
|
breadcrumbs = strings.TrimSpace(breadcrumbs)
|
|
|
|
if breadcrumbs != "" {
|
|
return breadcrumbs
|
|
}
|
|
|
|
if docURL != "" {
|
|
u, err := url.Parse(docURL)
|
|
if err == nil {
|
|
path := strings.TrimPrefix(u.Path, "/")
|
|
path = strings.TrimSuffix(path, "/index.html")
|
|
path = strings.TrimSuffix(path, ".html")
|
|
path = strings.ReplaceAll(path, "/", "::")
|
|
return path
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func (p *Parser) extractItemKind(doc *goquery.Document) ItemKind {
|
|
h1 := doc.Find(".main-heading h1 span").First()
|
|
if h1.Length() > 0 {
|
|
class, _ := h1.Attr("class")
|
|
switch {
|
|
case strings.Contains(class, "struct"):
|
|
return ItemKindStruct
|
|
case strings.Contains(class, "enum"):
|
|
return ItemKindEnum
|
|
case strings.Contains(class, "trait"):
|
|
return ItemKindTrait
|
|
case strings.Contains(class, "fn"):
|
|
return ItemKindFn
|
|
case strings.Contains(class, "macro"):
|
|
return ItemKindMacro
|
|
case strings.Contains(class, "const"):
|
|
return ItemKindConst
|
|
case strings.Contains(class, "static"):
|
|
return ItemKindStatic
|
|
case strings.Contains(class, "mod"):
|
|
return ItemKindMod
|
|
case strings.Contains(class, "type"):
|
|
return ItemKindType
|
|
}
|
|
}
|
|
|
|
title := doc.Find(".main-heading h1").Text()
|
|
switch {
|
|
case strings.HasPrefix(title, "Struct "):
|
|
return ItemKindStruct
|
|
case strings.HasPrefix(title, "Enum "):
|
|
return ItemKindEnum
|
|
case strings.HasPrefix(title, "Trait "):
|
|
return ItemKindTrait
|
|
case strings.HasPrefix(title, "Fn ") || strings.HasPrefix(title, "Function "):
|
|
return ItemKindFn
|
|
case strings.HasPrefix(title, "Macro "):
|
|
return ItemKindMacro
|
|
case strings.HasPrefix(title, "Const "):
|
|
return ItemKindConst
|
|
case strings.HasPrefix(title, "Static "):
|
|
return ItemKindStatic
|
|
case strings.HasPrefix(title, "Module "):
|
|
return ItemKindMod
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func (p *Parser) extractItemSignature(doc *goquery.Document) string {
|
|
sig := doc.Find("pre.rust.item-decl").Text()
|
|
sig = strings.TrimSpace(sig)
|
|
if sig != "" {
|
|
return sig
|
|
}
|
|
|
|
sig = doc.Find("pre.rust").First().Text()
|
|
return strings.TrimSpace(sig)
|
|
}
|
|
|
|
func (p *Parser) extractItemDoc(doc *goquery.Document) string {
|
|
docblock := doc.Find(".top-doc .docblock").First()
|
|
if docblock.Length() > 0 {
|
|
return strings.TrimSpace(docblock.Text())
|
|
}
|
|
|
|
docblock = doc.Find(".docblock").First()
|
|
if docblock.Length() > 0 {
|
|
return strings.TrimSpace(docblock.Text())
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func (p *Parser) extractModules(doc *goquery.Document) []*Module {
|
|
var modules []*Module
|
|
|
|
doc.Find(".item-table .mod, .module-item .mod").Each(func(_ int, s *goquery.Selection) {
|
|
mod := &Module{}
|
|
|
|
mod.Name = strings.TrimSpace(s.Find("a.mod").Text())
|
|
if mod.Name == "" {
|
|
mod.Name = strings.TrimSpace(s.Find("a").First().Text())
|
|
}
|
|
|
|
if href, exists := s.Find("a").First().Attr("href"); exists {
|
|
mod.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
mod.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
|
|
|
mod.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
|
|
if mod.Name != "" {
|
|
modules = append(modules, mod)
|
|
}
|
|
})
|
|
|
|
return modules
|
|
}
|
|
|
|
func (p *Parser) extractStructs(doc *goquery.Document) []*Struct {
|
|
var structs []*Struct
|
|
|
|
doc.Find(".item-table .struct, .struct").Each(func(_ int, s *goquery.Selection) {
|
|
st := &Struct{}
|
|
|
|
st.Name = strings.TrimSpace(s.Find("a.struct").Text())
|
|
if st.Name == "" {
|
|
st.Name = strings.TrimSpace(s.Find("a").First().Text())
|
|
}
|
|
|
|
if href, exists := s.Find("a").First().Attr("href"); exists {
|
|
st.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
st.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
|
st.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
|
|
if st.Name != "" {
|
|
structs = append(structs, st)
|
|
}
|
|
})
|
|
|
|
return structs
|
|
}
|
|
|
|
func (p *Parser) extractEnums(doc *goquery.Document) []*Enum {
|
|
var enums []*Enum
|
|
|
|
doc.Find(".item-table .enum, .enum").Each(func(_ int, s *goquery.Selection) {
|
|
e := &Enum{}
|
|
|
|
e.Name = strings.TrimSpace(s.Find("a.enum").Text())
|
|
if e.Name == "" {
|
|
e.Name = strings.TrimSpace(s.Find("a").First().Text())
|
|
}
|
|
|
|
if href, exists := s.Find("a").First().Attr("href"); exists {
|
|
e.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
e.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
|
e.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
|
|
if e.Name != "" {
|
|
enums = append(enums, e)
|
|
}
|
|
})
|
|
|
|
return enums
|
|
}
|
|
|
|
func (p *Parser) extractTraits(doc *goquery.Document) []*Trait {
|
|
var traits []*Trait
|
|
|
|
doc.Find(".item-table .trait, .trait").Each(func(_ int, s *goquery.Selection) {
|
|
t := &Trait{}
|
|
|
|
t.Name = strings.TrimSpace(s.Find("a.trait").Text())
|
|
if t.Name == "" {
|
|
t.Name = strings.TrimSpace(s.Find("a").First().Text())
|
|
}
|
|
|
|
if href, exists := s.Find("a").First().Attr("href"); exists {
|
|
t.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
t.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
|
t.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
|
|
if t.Name != "" {
|
|
traits = append(traits, t)
|
|
}
|
|
})
|
|
|
|
return traits
|
|
}
|
|
|
|
func (p *Parser) extractFunctions(doc *goquery.Document) []*Func {
|
|
var funcs []*Func
|
|
|
|
doc.Find(".item-table .fn, .fn, .function").Each(func(_ int, s *goquery.Selection) {
|
|
f := &Func{}
|
|
|
|
f.Name = strings.TrimSpace(s.Find("a.fn").Text())
|
|
if f.Name == "" {
|
|
f.Name = strings.TrimSpace(s.Find("a").First().Text())
|
|
}
|
|
|
|
if href, exists := s.Find("a").First().Attr("href"); exists {
|
|
f.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
f.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
|
f.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
f.IsUnsafe = strings.Contains(s.Text(), "unsafe")
|
|
|
|
if f.Name != "" {
|
|
funcs = append(funcs, f)
|
|
}
|
|
})
|
|
|
|
return funcs
|
|
}
|
|
|
|
func (p *Parser) extractMacros(doc *goquery.Document) []*Macro {
|
|
var macros []*Macro
|
|
|
|
doc.Find(".item-table .macro, .macro").Each(func(_ int, s *goquery.Selection) {
|
|
m := &Macro{}
|
|
|
|
m.Name = strings.TrimSpace(s.Find("a.macro").Text())
|
|
if m.Name == "" {
|
|
m.Name = strings.TrimSpace(s.Find("a").First().Text())
|
|
}
|
|
|
|
if href, exists := s.Find("a").First().Attr("href"); exists {
|
|
m.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
m.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
|
m.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
|
|
if m.Name != "" {
|
|
macros = append(macros, m)
|
|
}
|
|
})
|
|
|
|
return macros
|
|
}
|
|
|
|
func (p *Parser) extractConstants(doc *goquery.Document) []*Const {
|
|
var constants []*Const
|
|
|
|
doc.Find(".item-table .constant, .constant").Each(func(_ int, s *goquery.Selection) {
|
|
c := &Const{}
|
|
|
|
c.Name = strings.TrimSpace(s.Find("a.constant").Text())
|
|
if c.Name == "" {
|
|
c.Name = strings.TrimSpace(s.Find("a").First().Text())
|
|
}
|
|
|
|
if href, exists := s.Find("a").First().Attr("href"); exists {
|
|
c.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
c.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
|
c.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
|
|
if c.Name != "" {
|
|
constants = append(constants, c)
|
|
}
|
|
})
|
|
|
|
return constants
|
|
}
|
|
|
|
func (p *Parser) extractStatics(doc *goquery.Document) []*Static {
|
|
var statics []*Static
|
|
|
|
doc.Find(".item-table .static, .static").Each(func(_ int, s *goquery.Selection) {
|
|
st := &Static{}
|
|
|
|
st.Name = strings.TrimSpace(s.Find("a.static").Text())
|
|
if st.Name == "" {
|
|
st.Name = strings.TrimSpace(s.Find("a").First().Text())
|
|
}
|
|
|
|
if href, exists := s.Find("a").First().Attr("href"); exists {
|
|
st.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
st.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
|
st.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
|
|
if st.Name != "" {
|
|
statics = append(statics, st)
|
|
}
|
|
})
|
|
|
|
return statics
|
|
}
|
|
|
|
func (p *Parser) ExtractMethods(doc *goquery.Document) []*Method {
|
|
var methods []*Method
|
|
|
|
doc.Find(".impl-items .method-toggle, details.method-toggle").Each(func(_ int, s *goquery.Selection) {
|
|
m := &Method{}
|
|
|
|
m.Name = strings.TrimSpace(s.Find(".fn, .method, h4.code-header").Text())
|
|
if m.Name == "" {
|
|
section := s.Find("section.method")
|
|
m.Name = strings.TrimSpace(section.Find(".fn").Text())
|
|
}
|
|
|
|
sig := s.Find("pre, .code-header, h4.code-header")
|
|
m.Signature = strings.TrimSpace(sig.Text())
|
|
|
|
m.Doc = strings.TrimSpace(s.Find(".docblock").Text())
|
|
|
|
m.IsUnsafe = strings.Contains(m.Signature, "unsafe")
|
|
m.IsAsync = strings.Contains(m.Signature, "async")
|
|
m.IsConst = strings.Contains(m.Signature, "const")
|
|
|
|
m.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
|
|
|
if m.Name != "" {
|
|
methods = append(methods, m)
|
|
}
|
|
})
|
|
|
|
return methods
|
|
}
|
|
|
|
func (p *Parser) ExtractStructFields(doc *goquery.Document) []*Field {
|
|
var fields []*Field
|
|
|
|
doc.Find(".struct .fields tr, .struct-member").Each(func(_ int, s *goquery.Selection) {
|
|
f := &Field{}
|
|
|
|
f.Name = strings.TrimSpace(s.Find(".structfield, td:first-child").Text())
|
|
f.Type = strings.TrimSpace(s.Find(".type, td:nth-child(2)").Text())
|
|
f.Doc = strings.TrimSpace(s.Find(".docblock, td:last-child").Text())
|
|
f.IsPub = strings.Contains(s.Text(), "pub")
|
|
|
|
if f.Name != "" {
|
|
fields = append(fields, f)
|
|
}
|
|
})
|
|
|
|
return fields
|
|
}
|
|
|
|
func (p *Parser) ExtractEnumVariants(doc *goquery.Document) []*Variant {
|
|
var variants []*Variant
|
|
|
|
doc.Find(".enum .variants li, .variant").Each(func(_ int, s *goquery.Selection) {
|
|
v := &Variant{}
|
|
|
|
v.Name = strings.TrimSpace(s.Find("a, .variant-name").Text())
|
|
if v.Name == "" {
|
|
v.Name = strings.TrimSpace(s.Text())
|
|
}
|
|
|
|
v.Doc = strings.TrimSpace(s.Find(".docblock").Text())
|
|
|
|
sig := s.Text()
|
|
v.IsTuple = strings.Contains(sig, "(") && !strings.Contains(sig, "{")
|
|
v.IsStruct = strings.Contains(sig, "{")
|
|
v.IsUnit = !v.IsTuple && !v.IsStruct
|
|
|
|
if v.Name != "" {
|
|
variants = append(variants, v)
|
|
}
|
|
})
|
|
|
|
return variants
|
|
}
|
|
|
|
func extractKindFromClasses(classes string) string {
|
|
classList := strings.Fields(classes)
|
|
for _, c := range classList {
|
|
switch {
|
|
case strings.HasPrefix(c, "result-"):
|
|
kind := strings.TrimPrefix(c, "result-")
|
|
switch kind {
|
|
case "struct", "enum", "trait", "fn", "macro", "const", "static", "mod", "type", "primitive", "keyword", "attr":
|
|
return kind
|
|
case "method":
|
|
return "fn"
|
|
case "externcrate":
|
|
return "mod"
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func resolveURL(base string, href string) string {
|
|
if strings.HasPrefix(href, "http") {
|
|
return href
|
|
}
|
|
|
|
baseURL, err := url.Parse(base)
|
|
if err != nil {
|
|
return href
|
|
}
|
|
|
|
hrefURL, err := url.Parse(href)
|
|
if err != nil {
|
|
return href
|
|
}
|
|
|
|
return baseURL.ResolveReference(hrefURL).String()
|
|
}
|
|
|
|
func cleanText(text string) string {
|
|
re := regexp.MustCompile(`\s+`)
|
|
text = re.ReplaceAllString(text, " ")
|
|
return strings.TrimSpace(text)
|
|
}
|