mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,633 @@
|
||||
package rustdocs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
baseURL string
|
||||
}
|
||||
|
||||
func NewParser() *Parser {
|
||||
return &Parser{
|
||||
baseURL: "https://docs.rs",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) ParseCratePage(html string, docURL string) (*Crate, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
crate := &Crate{
|
||||
DocURL: docURL,
|
||||
FetchedAt: time.Now(),
|
||||
}
|
||||
|
||||
crate.Name = p.extractCrateName(doc)
|
||||
crate.Version = p.extractVersion(doc)
|
||||
crate.Description = p.extractDescription(doc)
|
||||
crate.Repository = p.extractRepository(doc)
|
||||
|
||||
crate.Modules = p.extractModules(doc)
|
||||
crate.Structs = p.extractStructs(doc)
|
||||
crate.Enums = p.extractEnums(doc)
|
||||
crate.Traits = p.extractTraits(doc)
|
||||
crate.Functions = p.extractFunctions(doc)
|
||||
crate.Macros = p.extractMacros(doc)
|
||||
crate.Constants = p.extractConstants(doc)
|
||||
crate.Statics = p.extractStatics(doc)
|
||||
|
||||
return crate, nil
|
||||
}
|
||||
|
||||
func (p *Parser) ParseItemPage(html string, docURL string) (*Symbol, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
symbol := &Symbol{
|
||||
DocURL: docURL,
|
||||
}
|
||||
|
||||
symbol.Name = p.extractItemName(doc)
|
||||
symbol.Path = p.extractItemPath(doc, docURL)
|
||||
symbol.Kind = p.extractItemKind(doc)
|
||||
symbol.Signature = p.extractItemSignature(doc)
|
||||
symbol.Doc = p.extractItemDoc(doc)
|
||||
|
||||
return symbol, nil
|
||||
}
|
||||
|
||||
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []*SearchResult
|
||||
|
||||
doc.Find("#results .search-results a").Each(func(i int, s *goquery.Selection) {
|
||||
result := &SearchResult{}
|
||||
|
||||
classes, _ := s.Attr("class")
|
||||
result.Kind = extractKindFromClasses(classes)
|
||||
|
||||
nameEl := s.Find(".result-name")
|
||||
result.Name = strings.TrimSpace(nameEl.Find(".method, .struct, .fn, .trait, .enum, .mod, .macro, .const, .static, .attr").Text())
|
||||
if result.Name == "" {
|
||||
nameText := nameEl.Text()
|
||||
result.Name = strings.TrimSpace(strings.Split(nameText, "\n")[0])
|
||||
}
|
||||
|
||||
var pathParts []string
|
||||
nameEl.Find(".path span").Each(func(_ int, span *goquery.Selection) {
|
||||
part := strings.TrimSpace(span.Text())
|
||||
if part != "" {
|
||||
pathParts = append(pathParts, part)
|
||||
}
|
||||
})
|
||||
result.Path = strings.Join(pathParts, "::")
|
||||
|
||||
result.Description = strings.TrimSpace(s.Find(".desc").Text())
|
||||
|
||||
if href, exists := s.Attr("href"); exists {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
result.DocURL = href
|
||||
} else {
|
||||
u, err := url.Parse("https://docs.rs")
|
||||
if err == nil {
|
||||
u.Path = href
|
||||
result.DocURL = u.String()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stabilityEl := s.Find(".stab")
|
||||
if stabilityEl.Length() > 0 {
|
||||
if stabilityEl.HasClass("unstable") || stabilityEl.HasClass("experimental") {
|
||||
result.IsExperimental = true
|
||||
}
|
||||
result.Stability = strings.TrimSpace(stabilityEl.Text())
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (p *Parser) extractCrateName(doc *goquery.Document) string {
|
||||
title := doc.Find(".main-heading h1").Text()
|
||||
title = strings.TrimSpace(title)
|
||||
|
||||
if strings.HasPrefix(title, "Crate ") {
|
||||
return strings.TrimPrefix(title, "Crate ")
|
||||
}
|
||||
if strings.HasPrefix(title, "Module ") {
|
||||
return strings.TrimPrefix(title, "Module ")
|
||||
}
|
||||
|
||||
h1 := doc.Find("h1").First().Text()
|
||||
h1 = strings.TrimSpace(h1)
|
||||
if strings.HasPrefix(h1, "Crate ") {
|
||||
return strings.TrimPrefix(h1, "Crate ")
|
||||
}
|
||||
|
||||
return title
|
||||
}
|
||||
|
||||
func (p *Parser) extractVersion(doc *goquery.Document) string {
|
||||
since := doc.Find(".since").Text()
|
||||
if since != "" {
|
||||
re := regexp.MustCompile(`\d+\.\d+\.\d+`)
|
||||
if match := re.FindString(since); match != "" {
|
||||
return match
|
||||
}
|
||||
}
|
||||
|
||||
subHeading := doc.Find(".sub-heading").Text()
|
||||
re := regexp.MustCompile(`v?(\d+\.\d+\.\d+)`)
|
||||
if match := re.FindStringSubmatch(subHeading); len(match) > 1 {
|
||||
return match[1]
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractDescription(doc *goquery.Document) string {
|
||||
topDoc := doc.Find(".top-doc .docblock").First()
|
||||
if topDoc.Length() > 0 {
|
||||
return strings.TrimSpace(topDoc.Text())
|
||||
}
|
||||
|
||||
topDoc = doc.Find(".docblock").First()
|
||||
if topDoc.Length() > 0 {
|
||||
return strings.TrimSpace(topDoc.Text())
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractRepository(doc *goquery.Document) string {
|
||||
srcLink := doc.Find("a.src")
|
||||
if srcLink.Length() > 0 {
|
||||
if href, exists := srcLink.Attr("href"); exists {
|
||||
if strings.Contains(href, "github.com") {
|
||||
re := regexp.MustCompile(`https://github\.com/[^/]+/[^/]+`)
|
||||
if match := re.FindString(href); match != "" {
|
||||
return match
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractItemName(doc *goquery.Document) string {
|
||||
h1 := doc.Find(".main-heading h1").Text()
|
||||
h1 = strings.TrimSpace(h1)
|
||||
|
||||
for _, prefix := range []string{"Struct ", "Enum ", "Trait ", "Fn ", "Macro ", "Const ", "Static ", "Module ", "Type "} {
|
||||
if strings.HasPrefix(h1, prefix) {
|
||||
return strings.TrimPrefix(h1, prefix)
|
||||
}
|
||||
}
|
||||
|
||||
return h1
|
||||
}
|
||||
|
||||
func (p *Parser) extractItemPath(doc *goquery.Document, docURL string) string {
|
||||
breadcrumbs := doc.Find(".rustdoc-breadcrumbs").Text()
|
||||
breadcrumbs = strings.TrimSpace(breadcrumbs)
|
||||
breadcrumbs = strings.ReplaceAll(breadcrumbs, "\n", "")
|
||||
breadcrumbs = strings.ReplaceAll(breadcrumbs, " ", " ")
|
||||
breadcrumbs = strings.TrimSpace(breadcrumbs)
|
||||
|
||||
if breadcrumbs != "" {
|
||||
return breadcrumbs
|
||||
}
|
||||
|
||||
if docURL != "" {
|
||||
u, err := url.Parse(docURL)
|
||||
if err == nil {
|
||||
path := strings.TrimPrefix(u.Path, "/")
|
||||
path = strings.TrimSuffix(path, "/index.html")
|
||||
path = strings.TrimSuffix(path, ".html")
|
||||
path = strings.ReplaceAll(path, "/", "::")
|
||||
return path
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractItemKind(doc *goquery.Document) ItemKind {
|
||||
h1 := doc.Find(".main-heading h1 span").First()
|
||||
if h1.Length() > 0 {
|
||||
class, _ := h1.Attr("class")
|
||||
switch {
|
||||
case strings.Contains(class, "struct"):
|
||||
return ItemKindStruct
|
||||
case strings.Contains(class, "enum"):
|
||||
return ItemKindEnum
|
||||
case strings.Contains(class, "trait"):
|
||||
return ItemKindTrait
|
||||
case strings.Contains(class, "fn"):
|
||||
return ItemKindFn
|
||||
case strings.Contains(class, "macro"):
|
||||
return ItemKindMacro
|
||||
case strings.Contains(class, "const"):
|
||||
return ItemKindConst
|
||||
case strings.Contains(class, "static"):
|
||||
return ItemKindStatic
|
||||
case strings.Contains(class, "mod"):
|
||||
return ItemKindMod
|
||||
case strings.Contains(class, "type"):
|
||||
return ItemKindType
|
||||
}
|
||||
}
|
||||
|
||||
title := doc.Find(".main-heading h1").Text()
|
||||
switch {
|
||||
case strings.HasPrefix(title, "Struct "):
|
||||
return ItemKindStruct
|
||||
case strings.HasPrefix(title, "Enum "):
|
||||
return ItemKindEnum
|
||||
case strings.HasPrefix(title, "Trait "):
|
||||
return ItemKindTrait
|
||||
case strings.HasPrefix(title, "Fn ") || strings.HasPrefix(title, "Function "):
|
||||
return ItemKindFn
|
||||
case strings.HasPrefix(title, "Macro "):
|
||||
return ItemKindMacro
|
||||
case strings.HasPrefix(title, "Const "):
|
||||
return ItemKindConst
|
||||
case strings.HasPrefix(title, "Static "):
|
||||
return ItemKindStatic
|
||||
case strings.HasPrefix(title, "Module "):
|
||||
return ItemKindMod
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractItemSignature(doc *goquery.Document) string {
|
||||
sig := doc.Find("pre.rust.item-decl").Text()
|
||||
sig = strings.TrimSpace(sig)
|
||||
if sig != "" {
|
||||
return sig
|
||||
}
|
||||
|
||||
sig = doc.Find("pre.rust").First().Text()
|
||||
return strings.TrimSpace(sig)
|
||||
}
|
||||
|
||||
func (p *Parser) extractItemDoc(doc *goquery.Document) string {
|
||||
docblock := doc.Find(".top-doc .docblock").First()
|
||||
if docblock.Length() > 0 {
|
||||
return strings.TrimSpace(docblock.Text())
|
||||
}
|
||||
|
||||
docblock = doc.Find(".docblock").First()
|
||||
if docblock.Length() > 0 {
|
||||
return strings.TrimSpace(docblock.Text())
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractModules(doc *goquery.Document) []*Module {
|
||||
var modules []*Module
|
||||
|
||||
doc.Find(".item-table .mod, .module-item .mod").Each(func(_ int, s *goquery.Selection) {
|
||||
mod := &Module{}
|
||||
|
||||
mod.Name = strings.TrimSpace(s.Find("a.mod").Text())
|
||||
if mod.Name == "" {
|
||||
mod.Name = strings.TrimSpace(s.Find("a").First().Text())
|
||||
}
|
||||
|
||||
if href, exists := s.Find("a").First().Attr("href"); exists {
|
||||
mod.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
mod.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
||||
|
||||
mod.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
|
||||
if mod.Name != "" {
|
||||
modules = append(modules, mod)
|
||||
}
|
||||
})
|
||||
|
||||
return modules
|
||||
}
|
||||
|
||||
func (p *Parser) extractStructs(doc *goquery.Document) []*Struct {
|
||||
var structs []*Struct
|
||||
|
||||
doc.Find(".item-table .struct, .struct").Each(func(_ int, s *goquery.Selection) {
|
||||
st := &Struct{}
|
||||
|
||||
st.Name = strings.TrimSpace(s.Find("a.struct").Text())
|
||||
if st.Name == "" {
|
||||
st.Name = strings.TrimSpace(s.Find("a").First().Text())
|
||||
}
|
||||
|
||||
if href, exists := s.Find("a").First().Attr("href"); exists {
|
||||
st.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
st.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
||||
st.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
|
||||
if st.Name != "" {
|
||||
structs = append(structs, st)
|
||||
}
|
||||
})
|
||||
|
||||
return structs
|
||||
}
|
||||
|
||||
func (p *Parser) extractEnums(doc *goquery.Document) []*Enum {
|
||||
var enums []*Enum
|
||||
|
||||
doc.Find(".item-table .enum, .enum").Each(func(_ int, s *goquery.Selection) {
|
||||
e := &Enum{}
|
||||
|
||||
e.Name = strings.TrimSpace(s.Find("a.enum").Text())
|
||||
if e.Name == "" {
|
||||
e.Name = strings.TrimSpace(s.Find("a").First().Text())
|
||||
}
|
||||
|
||||
if href, exists := s.Find("a").First().Attr("href"); exists {
|
||||
e.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
e.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
||||
e.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
|
||||
if e.Name != "" {
|
||||
enums = append(enums, e)
|
||||
}
|
||||
})
|
||||
|
||||
return enums
|
||||
}
|
||||
|
||||
func (p *Parser) extractTraits(doc *goquery.Document) []*Trait {
|
||||
var traits []*Trait
|
||||
|
||||
doc.Find(".item-table .trait, .trait").Each(func(_ int, s *goquery.Selection) {
|
||||
t := &Trait{}
|
||||
|
||||
t.Name = strings.TrimSpace(s.Find("a.trait").Text())
|
||||
if t.Name == "" {
|
||||
t.Name = strings.TrimSpace(s.Find("a").First().Text())
|
||||
}
|
||||
|
||||
if href, exists := s.Find("a").First().Attr("href"); exists {
|
||||
t.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
t.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
||||
t.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
|
||||
if t.Name != "" {
|
||||
traits = append(traits, t)
|
||||
}
|
||||
})
|
||||
|
||||
return traits
|
||||
}
|
||||
|
||||
func (p *Parser) extractFunctions(doc *goquery.Document) []*Func {
|
||||
var funcs []*Func
|
||||
|
||||
doc.Find(".item-table .fn, .fn, .function").Each(func(_ int, s *goquery.Selection) {
|
||||
f := &Func{}
|
||||
|
||||
f.Name = strings.TrimSpace(s.Find("a.fn").Text())
|
||||
if f.Name == "" {
|
||||
f.Name = strings.TrimSpace(s.Find("a").First().Text())
|
||||
}
|
||||
|
||||
if href, exists := s.Find("a").First().Attr("href"); exists {
|
||||
f.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
f.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
||||
f.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
f.IsUnsafe = strings.Contains(s.Text(), "unsafe")
|
||||
|
||||
if f.Name != "" {
|
||||
funcs = append(funcs, f)
|
||||
}
|
||||
})
|
||||
|
||||
return funcs
|
||||
}
|
||||
|
||||
func (p *Parser) extractMacros(doc *goquery.Document) []*Macro {
|
||||
var macros []*Macro
|
||||
|
||||
doc.Find(".item-table .macro, .macro").Each(func(_ int, s *goquery.Selection) {
|
||||
m := &Macro{}
|
||||
|
||||
m.Name = strings.TrimSpace(s.Find("a.macro").Text())
|
||||
if m.Name == "" {
|
||||
m.Name = strings.TrimSpace(s.Find("a").First().Text())
|
||||
}
|
||||
|
||||
if href, exists := s.Find("a").First().Attr("href"); exists {
|
||||
m.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
m.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
||||
m.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
|
||||
if m.Name != "" {
|
||||
macros = append(macros, m)
|
||||
}
|
||||
})
|
||||
|
||||
return macros
|
||||
}
|
||||
|
||||
func (p *Parser) extractConstants(doc *goquery.Document) []*Const {
|
||||
var constants []*Const
|
||||
|
||||
doc.Find(".item-table .constant, .constant").Each(func(_ int, s *goquery.Selection) {
|
||||
c := &Const{}
|
||||
|
||||
c.Name = strings.TrimSpace(s.Find("a.constant").Text())
|
||||
if c.Name == "" {
|
||||
c.Name = strings.TrimSpace(s.Find("a").First().Text())
|
||||
}
|
||||
|
||||
if href, exists := s.Find("a").First().Attr("href"); exists {
|
||||
c.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
c.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
||||
c.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
|
||||
if c.Name != "" {
|
||||
constants = append(constants, c)
|
||||
}
|
||||
})
|
||||
|
||||
return constants
|
||||
}
|
||||
|
||||
func (p *Parser) extractStatics(doc *goquery.Document) []*Static {
|
||||
var statics []*Static
|
||||
|
||||
doc.Find(".item-table .static, .static").Each(func(_ int, s *goquery.Selection) {
|
||||
st := &Static{}
|
||||
|
||||
st.Name = strings.TrimSpace(s.Find("a.static").Text())
|
||||
if st.Name == "" {
|
||||
st.Name = strings.TrimSpace(s.Find("a").First().Text())
|
||||
}
|
||||
|
||||
if href, exists := s.Find("a").First().Attr("href"); exists {
|
||||
st.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
st.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
|
||||
st.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
|
||||
if st.Name != "" {
|
||||
statics = append(statics, st)
|
||||
}
|
||||
})
|
||||
|
||||
return statics
|
||||
}
|
||||
|
||||
func (p *Parser) ExtractMethods(doc *goquery.Document) []*Method {
|
||||
var methods []*Method
|
||||
|
||||
doc.Find(".impl-items .method-toggle, details.method-toggle").Each(func(_ int, s *goquery.Selection) {
|
||||
m := &Method{}
|
||||
|
||||
m.Name = strings.TrimSpace(s.Find(".fn, .method, h4.code-header").Text())
|
||||
if m.Name == "" {
|
||||
section := s.Find("section.method")
|
||||
m.Name = strings.TrimSpace(section.Find(".fn").Text())
|
||||
}
|
||||
|
||||
sig := s.Find("pre, .code-header, h4.code-header")
|
||||
m.Signature = strings.TrimSpace(sig.Text())
|
||||
|
||||
m.Doc = strings.TrimSpace(s.Find(".docblock").Text())
|
||||
|
||||
m.IsUnsafe = strings.Contains(m.Signature, "unsafe")
|
||||
m.IsAsync = strings.Contains(m.Signature, "async")
|
||||
m.IsConst = strings.Contains(m.Signature, "const")
|
||||
|
||||
m.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
|
||||
|
||||
if m.Name != "" {
|
||||
methods = append(methods, m)
|
||||
}
|
||||
})
|
||||
|
||||
return methods
|
||||
}
|
||||
|
||||
func (p *Parser) ExtractStructFields(doc *goquery.Document) []*Field {
|
||||
var fields []*Field
|
||||
|
||||
doc.Find(".struct .fields tr, .struct-member").Each(func(_ int, s *goquery.Selection) {
|
||||
f := &Field{}
|
||||
|
||||
f.Name = strings.TrimSpace(s.Find(".structfield, td:first-child").Text())
|
||||
f.Type = strings.TrimSpace(s.Find(".type, td:nth-child(2)").Text())
|
||||
f.Doc = strings.TrimSpace(s.Find(".docblock, td:last-child").Text())
|
||||
f.IsPub = strings.Contains(s.Text(), "pub")
|
||||
|
||||
if f.Name != "" {
|
||||
fields = append(fields, f)
|
||||
}
|
||||
})
|
||||
|
||||
return fields
|
||||
}
|
||||
|
||||
func (p *Parser) ExtractEnumVariants(doc *goquery.Document) []*Variant {
|
||||
var variants []*Variant
|
||||
|
||||
doc.Find(".enum .variants li, .variant").Each(func(_ int, s *goquery.Selection) {
|
||||
v := &Variant{}
|
||||
|
||||
v.Name = strings.TrimSpace(s.Find("a, .variant-name").Text())
|
||||
if v.Name == "" {
|
||||
v.Name = strings.TrimSpace(s.Text())
|
||||
}
|
||||
|
||||
v.Doc = strings.TrimSpace(s.Find(".docblock").Text())
|
||||
|
||||
sig := s.Text()
|
||||
v.IsTuple = strings.Contains(sig, "(") && !strings.Contains(sig, "{")
|
||||
v.IsStruct = strings.Contains(sig, "{")
|
||||
v.IsUnit = !v.IsTuple && !v.IsStruct
|
||||
|
||||
if v.Name != "" {
|
||||
variants = append(variants, v)
|
||||
}
|
||||
})
|
||||
|
||||
return variants
|
||||
}
|
||||
|
||||
func extractKindFromClasses(classes string) string {
|
||||
classList := strings.Fields(classes)
|
||||
for _, c := range classList {
|
||||
switch {
|
||||
case strings.HasPrefix(c, "result-"):
|
||||
kind := strings.TrimPrefix(c, "result-")
|
||||
switch kind {
|
||||
case "struct", "enum", "trait", "fn", "macro", "const", "static", "mod", "type", "primitive", "keyword", "attr":
|
||||
return kind
|
||||
case "method":
|
||||
return "fn"
|
||||
case "externcrate":
|
||||
return "mod"
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func resolveURL(base string, href string) string {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
return href
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return baseURL.ResolveReference(hrefURL).String()
|
||||
}
|
||||
|
||||
func cleanText(text string) string {
|
||||
re := regexp.MustCompile(`\s+`)
|
||||
text = re.ReplaceAllString(text, " ")
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
@@ -0,0 +1,265 @@
|
||||
package rustdocs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const testStructPageHTML = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<main>
|
||||
<div class="main-heading">
|
||||
<div class="rustdoc-breadcrumbs"><a href="../index.html">std</a>::<wbr><a href="index.html">simd</a></div>
|
||||
<h1>Struct <span class="struct">Mask</span></h1>
|
||||
<span class="sub-heading"><span class="since" title="Stable since Rust version 1.0.0">1.0.0</span></span>
|
||||
</div>
|
||||
<pre class="rust item-decl"><code>pub struct Mask<T, const N: usize>(<span class="comment">/* private fields */</span>)</code></pre>
|
||||
<details class="toggle top-doc" open="">
|
||||
<div class="docblock"><p>A SIMD vector mask for N elements.</p></div>
|
||||
</details>
|
||||
<h2 id="implementations">Implementations</h2>
|
||||
<details class="toggle implementors-toggle" open="">
|
||||
<details class="toggle method-toggle" open="">
|
||||
<section id="method.test" class="method">
|
||||
<h4 class="code-header">pub fn <a href="#method.test" class="fn">test</a>(&self, index: usize) -> bool</h4>
|
||||
</section>
|
||||
<div class="docblock"><p>Tests the value of the specified element.</p></div>
|
||||
</details>
|
||||
</details>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
const testSearchHTML = `
|
||||
<div id="results">
|
||||
<ul class="search-results active">
|
||||
<a class="result-method" href="../std/simd/struct.Mask.html#method.test">
|
||||
<span class="result-name">
|
||||
<span class="typename">method</span>
|
||||
<div class="path"><span>std::</span><span>simd::</span><span class="method">Mask::</span><span class="fn">test</span></div>
|
||||
</span>
|
||||
<div class="desc">Tests the value of the specified element.</div>
|
||||
</a>
|
||||
<a class="result-struct" href="../std/vec/struct.Vec.html">
|
||||
<span class="result-name">
|
||||
<span class="typename">struct</span>
|
||||
<div class="path"><span>std::</span><span>vec::</span><span class="struct">Vec</span></div>
|
||||
</span>
|
||||
<div class="desc">A contiguous growable array type.</div>
|
||||
</a>
|
||||
<a class="result-fn" href="../std/io/fn.stdout.html">
|
||||
<span class="result-name">
|
||||
<span class="typename">fn</span>
|
||||
<div class="path"><span>std::</span><span>io::</span><span class="fn">stdout</span></div>
|
||||
</span>
|
||||
<div class="desc">Constructs a new handle to the standard output.</div>
|
||||
</a>
|
||||
</ul>
|
||||
</div>
|
||||
`
|
||||
|
||||
const testCratePageHTML = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<main>
|
||||
<div class="main-heading">
|
||||
<h1>Crate <span>serde</span></h1>
|
||||
<span class="sub-heading"><span class="since">1.0.0</span></span>
|
||||
</div>
|
||||
<details class="toggle top-doc">
|
||||
<div class="docblock"><p>A framework for serializing and deserializing Rust data structures.</p></div>
|
||||
</details>
|
||||
<h2 id="modules">Modules</h2>
|
||||
<div class="item-table">
|
||||
<div class="module-item"><a class="mod" href="de/index.html">de</a><div class="desc">Deserialize implementation.</div></div>
|
||||
</div>
|
||||
<h2 id="structs">Structs</h2>
|
||||
<div class="item-table">
|
||||
<div class="struct"><a class="struct" href="struct.Serializer.html">Serializer</a><div class="desc">A structure for serializing Rust values.</div></div>
|
||||
</div>
|
||||
<h2 id="enums">Enums</h2>
|
||||
<div class="item-table">
|
||||
<div class="enum"><a class="enum" href="enum.Error.html">Error</a><div class="desc">Errors during serialization.</div></div>
|
||||
</div>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
func TestParseItemPage(t *testing.T) {
|
||||
parser := NewParser()
|
||||
symbol, err := parser.ParseItemPage(testStructPageHTML, "https://docs.rs/std/simd/struct.Mask.html")
|
||||
if err != nil {
|
||||
t.Fatalf("ParseItemPage failed: %v", err)
|
||||
}
|
||||
|
||||
if symbol.Name != "Mask" {
|
||||
t.Errorf("Expected name 'Mask', got '%s'", symbol.Name)
|
||||
}
|
||||
|
||||
if symbol.Kind != ItemKindStruct {
|
||||
t.Errorf("Expected kind 'struct', got '%s'", symbol.Kind)
|
||||
}
|
||||
|
||||
if symbol.Doc == "" {
|
||||
t.Error("Expected non-empty doc")
|
||||
}
|
||||
|
||||
if !strings.Contains(symbol.Signature, "struct Mask") {
|
||||
t.Errorf("Expected signature to contain 'struct Mask', got '%s'", symbol.Signature)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSearchResults(t *testing.T) {
|
||||
parser := NewParser()
|
||||
results, err := parser.ParseSearchResults(testSearchHTML)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseSearchResults failed: %v", err)
|
||||
}
|
||||
|
||||
if len(results) < 3 {
|
||||
t.Fatalf("Expected at least 3 results, got %d", len(results))
|
||||
}
|
||||
|
||||
method := results[0]
|
||||
if method.Kind != "fn" {
|
||||
t.Errorf("Expected kind 'fn' for method, got '%s'", method.Kind)
|
||||
}
|
||||
if method.Description == "" {
|
||||
t.Error("Expected non-empty description")
|
||||
}
|
||||
|
||||
structResult := results[1]
|
||||
if structResult.Kind != "struct" {
|
||||
t.Errorf("Expected kind 'struct', got '%s'", structResult.Kind)
|
||||
}
|
||||
|
||||
fnResult := results[2]
|
||||
if fnResult.Kind != "fn" {
|
||||
t.Errorf("Expected kind 'fn', got '%s'", fnResult.Kind)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCratePage(t *testing.T) {
|
||||
parser := NewParser()
|
||||
crate, err := parser.ParseCratePage(testCratePageHTML, "https://docs.rs/serde")
|
||||
if err != nil {
|
||||
t.Fatalf("ParseCratePage failed: %v", err)
|
||||
}
|
||||
|
||||
if crate.Name == "" {
|
||||
t.Error("Expected non-empty name")
|
||||
}
|
||||
|
||||
if crate.Description == "" {
|
||||
t.Error("Expected non-empty description")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractKindFromClasses(t *testing.T) {
|
||||
tests := []struct {
|
||||
classes string
|
||||
expected string
|
||||
}{
|
||||
{"result-struct", "struct"},
|
||||
{"result-enum", "enum"},
|
||||
{"result-trait", "trait"},
|
||||
{"result-fn", "fn"},
|
||||
{"result-macro", "macro"},
|
||||
{"result-const", "const"},
|
||||
{"result-static", "static"},
|
||||
{"result-mod", "mod"},
|
||||
{"result-method", "fn"},
|
||||
{"result-externcrate", "mod"},
|
||||
{"unknown-class", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.classes, func(t *testing.T) {
|
||||
got := extractKindFromClasses(tt.classes)
|
||||
if got != tt.expected {
|
||||
t.Errorf("extractKindFromClasses(%q) = %q, want %q", tt.classes, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
base string
|
||||
href string
|
||||
expected string
|
||||
}{
|
||||
{"https://docs.rs", "/serde/struct.Serializer.html", "https://docs.rs/serde/struct.Serializer.html"},
|
||||
{"https://docs.rs", "https://example.com/page", "https://example.com/page"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.href, func(t *testing.T) {
|
||||
got := resolveURL(tt.base, tt.href)
|
||||
if got != tt.expected {
|
||||
t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.href, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanText(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{" hello world ", "hello world"},
|
||||
{"single", "single"},
|
||||
{"\n\ttabs\t\n", "tabs"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.input, func(t *testing.T) {
|
||||
if got := cleanText(tt.input); got != tt.expected {
|
||||
t.Errorf("cleanText(%q) = %q, want %q", tt.input, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractItemPath(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
html := `<div class="rustdoc-breadcrumbs"><a href="../index.html">std</a>::<a href="index.html">simd</a></div>`
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
got := parser.extractItemPath(doc, "https://docs.rs/std/simd/struct.Mask.html")
|
||||
|
||||
if !strings.Contains(got, "std") || !strings.Contains(got, "simd") {
|
||||
t.Errorf("extractItemPath() = %q, expected to contain std and simd", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMethods(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testStructPageHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
methods := parser.ExtractMethods(doc)
|
||||
if len(methods) == 0 {
|
||||
t.Error("Expected at least one method")
|
||||
return
|
||||
}
|
||||
|
||||
if methods[0].Name == "" {
|
||||
t.Error("Expected non-empty method name")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
package rustdocs
|
||||
|
||||
import "time"
|
||||
|
||||
type Crate struct {
|
||||
Name string `json:"name"`
|
||||
Version string `json:"version"`
|
||||
Description string `json:"description"`
|
||||
Modules []*Module `json:"modules,omitempty"`
|
||||
Structs []*Struct `json:"structs,omitempty"`
|
||||
Enums []*Enum `json:"enums,omitempty"`
|
||||
Traits []*Trait `json:"traits,omitempty"`
|
||||
Functions []*Func `json:"functions,omitempty"`
|
||||
Macros []*Macro `json:"macros,omitempty"`
|
||||
Constants []*Const `json:"constants,omitempty"`
|
||||
Statics []*Static `json:"statics,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Repository string `json:"repository,omitempty"`
|
||||
License string `json:"license,omitempty"`
|
||||
FetchedAt time.Time `json:"fetched_at"`
|
||||
}
|
||||
|
||||
type Module struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
}
|
||||
|
||||
type Struct struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Declaration string `json:"declaration"`
|
||||
Fields []*Field `json:"fields,omitempty"`
|
||||
Methods []*Method `json:"methods,omitempty"`
|
||||
TraitImpls []string `json:"trait_impls,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
}
|
||||
|
||||
type Enum struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Declaration string `json:"declaration"`
|
||||
Variants []*Variant `json:"variants,omitempty"`
|
||||
Methods []*Method `json:"methods,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
}
|
||||
|
||||
type Trait struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Declaration string `json:"declaration"`
|
||||
Methods []*Method `json:"methods,omitempty"`
|
||||
AssociatedTypes []string `json:"associated_types,omitempty"`
|
||||
AssociatedConsts []string `json:"associated_consts,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
}
|
||||
|
||||
type Func struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
IsUnsafe bool `json:"is_unsafe"`
|
||||
IsConst bool `json:"is_const"`
|
||||
IsAsync bool `json:"is_async"`
|
||||
}
|
||||
|
||||
type Macro struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
Examples []string `json:"examples,omitempty"`
|
||||
}
|
||||
|
||||
type Const struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Value string `json:"value,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
}
|
||||
|
||||
type Static struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
IsMutable bool `json:"is_mutable"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
}
|
||||
|
||||
type Field struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
IsPub bool `json:"is_pub"`
|
||||
Visibility string `json:"visibility,omitempty"`
|
||||
}
|
||||
|
||||
type Variant struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Fields []string `json:"fields,omitempty"`
|
||||
IsTuple bool `json:"is_tuple"`
|
||||
IsStruct bool `json:"is_struct"`
|
||||
IsUnit bool `json:"is_unit"`
|
||||
}
|
||||
|
||||
type Method struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature"`
|
||||
Receiver string `json:"receiver,omitempty"`
|
||||
IsUnsafe bool `json:"is_unsafe"`
|
||||
IsConst bool `json:"is_const"`
|
||||
IsAsync bool `json:"is_async"`
|
||||
IsDefault bool `json:"is_default"`
|
||||
Examples []string `json:"examples,omitempty"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
}
|
||||
|
||||
type SearchResult struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Kind string `json:"kind"`
|
||||
Description string `json:"description,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Stability string `json:"stability,omitempty"`
|
||||
IsExperimental bool `json:"is_experimental"`
|
||||
}
|
||||
|
||||
type ItemKind string
|
||||
|
||||
const (
|
||||
ItemKindStruct ItemKind = "struct"
|
||||
ItemKindEnum ItemKind = "enum"
|
||||
ItemKindTrait ItemKind = "trait"
|
||||
ItemKindFn ItemKind = "fn"
|
||||
ItemKindMacro ItemKind = "macro"
|
||||
ItemKindConst ItemKind = "const"
|
||||
ItemKindStatic ItemKind = "static"
|
||||
ItemKindMod ItemKind = "mod"
|
||||
ItemKindType ItemKind = "type"
|
||||
ItemKindUnion ItemKind = "union"
|
||||
ItemKindPrimitive ItemKind = "primitive"
|
||||
ItemKindKeyword ItemKind = "keyword"
|
||||
ItemKindAttr ItemKind = "attr"
|
||||
)
|
||||
|
||||
type Symbol struct {
|
||||
Name string `json:"name"`
|
||||
Kind ItemKind `json:"kind"`
|
||||
Path string `json:"path"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
Reference in New Issue
Block a user