first commit

This commit is contained in:
Tomas Dvorak
2026-02-22 10:42:17 +01:00
commit 55885a0e8f
239 changed files with 103690 additions and 0 deletions
+633
View File
@@ -0,0 +1,633 @@
package rustdocs
import (
"net/url"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
type Parser struct {
baseURL string
}
func NewParser() *Parser {
return &Parser{
baseURL: "https://docs.rs",
}
}
func (p *Parser) ParseCratePage(html string, docURL string) (*Crate, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
crate := &Crate{
DocURL: docURL,
FetchedAt: time.Now(),
}
crate.Name = p.extractCrateName(doc)
crate.Version = p.extractVersion(doc)
crate.Description = p.extractDescription(doc)
crate.Repository = p.extractRepository(doc)
crate.Modules = p.extractModules(doc)
crate.Structs = p.extractStructs(doc)
crate.Enums = p.extractEnums(doc)
crate.Traits = p.extractTraits(doc)
crate.Functions = p.extractFunctions(doc)
crate.Macros = p.extractMacros(doc)
crate.Constants = p.extractConstants(doc)
crate.Statics = p.extractStatics(doc)
return crate, nil
}
func (p *Parser) ParseItemPage(html string, docURL string) (*Symbol, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
symbol := &Symbol{
DocURL: docURL,
}
symbol.Name = p.extractItemName(doc)
symbol.Path = p.extractItemPath(doc, docURL)
symbol.Kind = p.extractItemKind(doc)
symbol.Signature = p.extractItemSignature(doc)
symbol.Doc = p.extractItemDoc(doc)
return symbol, nil
}
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
var results []*SearchResult
doc.Find("#results .search-results a").Each(func(i int, s *goquery.Selection) {
result := &SearchResult{}
classes, _ := s.Attr("class")
result.Kind = extractKindFromClasses(classes)
nameEl := s.Find(".result-name")
result.Name = strings.TrimSpace(nameEl.Find(".method, .struct, .fn, .trait, .enum, .mod, .macro, .const, .static, .attr").Text())
if result.Name == "" {
nameText := nameEl.Text()
result.Name = strings.TrimSpace(strings.Split(nameText, "\n")[0])
}
var pathParts []string
nameEl.Find(".path span").Each(func(_ int, span *goquery.Selection) {
part := strings.TrimSpace(span.Text())
if part != "" {
pathParts = append(pathParts, part)
}
})
result.Path = strings.Join(pathParts, "::")
result.Description = strings.TrimSpace(s.Find(".desc").Text())
if href, exists := s.Attr("href"); exists {
if strings.HasPrefix(href, "http") {
result.DocURL = href
} else {
u, err := url.Parse("https://docs.rs")
if err == nil {
u.Path = href
result.DocURL = u.String()
}
}
}
stabilityEl := s.Find(".stab")
if stabilityEl.Length() > 0 {
if stabilityEl.HasClass("unstable") || stabilityEl.HasClass("experimental") {
result.IsExperimental = true
}
result.Stability = strings.TrimSpace(stabilityEl.Text())
}
results = append(results, result)
})
return results, nil
}
func (p *Parser) extractCrateName(doc *goquery.Document) string {
title := doc.Find(".main-heading h1").Text()
title = strings.TrimSpace(title)
if strings.HasPrefix(title, "Crate ") {
return strings.TrimPrefix(title, "Crate ")
}
if strings.HasPrefix(title, "Module ") {
return strings.TrimPrefix(title, "Module ")
}
h1 := doc.Find("h1").First().Text()
h1 = strings.TrimSpace(h1)
if strings.HasPrefix(h1, "Crate ") {
return strings.TrimPrefix(h1, "Crate ")
}
return title
}
func (p *Parser) extractVersion(doc *goquery.Document) string {
since := doc.Find(".since").Text()
if since != "" {
re := regexp.MustCompile(`\d+\.\d+\.\d+`)
if match := re.FindString(since); match != "" {
return match
}
}
subHeading := doc.Find(".sub-heading").Text()
re := regexp.MustCompile(`v?(\d+\.\d+\.\d+)`)
if match := re.FindStringSubmatch(subHeading); len(match) > 1 {
return match[1]
}
return ""
}
func (p *Parser) extractDescription(doc *goquery.Document) string {
topDoc := doc.Find(".top-doc .docblock").First()
if topDoc.Length() > 0 {
return strings.TrimSpace(topDoc.Text())
}
topDoc = doc.Find(".docblock").First()
if topDoc.Length() > 0 {
return strings.TrimSpace(topDoc.Text())
}
return ""
}
func (p *Parser) extractRepository(doc *goquery.Document) string {
srcLink := doc.Find("a.src")
if srcLink.Length() > 0 {
if href, exists := srcLink.Attr("href"); exists {
if strings.Contains(href, "github.com") {
re := regexp.MustCompile(`https://github\.com/[^/]+/[^/]+`)
if match := re.FindString(href); match != "" {
return match
}
}
}
}
return ""
}
func (p *Parser) extractItemName(doc *goquery.Document) string {
h1 := doc.Find(".main-heading h1").Text()
h1 = strings.TrimSpace(h1)
for _, prefix := range []string{"Struct ", "Enum ", "Trait ", "Fn ", "Macro ", "Const ", "Static ", "Module ", "Type "} {
if strings.HasPrefix(h1, prefix) {
return strings.TrimPrefix(h1, prefix)
}
}
return h1
}
func (p *Parser) extractItemPath(doc *goquery.Document, docURL string) string {
breadcrumbs := doc.Find(".rustdoc-breadcrumbs").Text()
breadcrumbs = strings.TrimSpace(breadcrumbs)
breadcrumbs = strings.ReplaceAll(breadcrumbs, "\n", "")
breadcrumbs = strings.ReplaceAll(breadcrumbs, " ", " ")
breadcrumbs = strings.TrimSpace(breadcrumbs)
if breadcrumbs != "" {
return breadcrumbs
}
if docURL != "" {
u, err := url.Parse(docURL)
if err == nil {
path := strings.TrimPrefix(u.Path, "/")
path = strings.TrimSuffix(path, "/index.html")
path = strings.TrimSuffix(path, ".html")
path = strings.ReplaceAll(path, "/", "::")
return path
}
}
return ""
}
func (p *Parser) extractItemKind(doc *goquery.Document) ItemKind {
h1 := doc.Find(".main-heading h1 span").First()
if h1.Length() > 0 {
class, _ := h1.Attr("class")
switch {
case strings.Contains(class, "struct"):
return ItemKindStruct
case strings.Contains(class, "enum"):
return ItemKindEnum
case strings.Contains(class, "trait"):
return ItemKindTrait
case strings.Contains(class, "fn"):
return ItemKindFn
case strings.Contains(class, "macro"):
return ItemKindMacro
case strings.Contains(class, "const"):
return ItemKindConst
case strings.Contains(class, "static"):
return ItemKindStatic
case strings.Contains(class, "mod"):
return ItemKindMod
case strings.Contains(class, "type"):
return ItemKindType
}
}
title := doc.Find(".main-heading h1").Text()
switch {
case strings.HasPrefix(title, "Struct "):
return ItemKindStruct
case strings.HasPrefix(title, "Enum "):
return ItemKindEnum
case strings.HasPrefix(title, "Trait "):
return ItemKindTrait
case strings.HasPrefix(title, "Fn ") || strings.HasPrefix(title, "Function "):
return ItemKindFn
case strings.HasPrefix(title, "Macro "):
return ItemKindMacro
case strings.HasPrefix(title, "Const "):
return ItemKindConst
case strings.HasPrefix(title, "Static "):
return ItemKindStatic
case strings.HasPrefix(title, "Module "):
return ItemKindMod
}
return ""
}
func (p *Parser) extractItemSignature(doc *goquery.Document) string {
sig := doc.Find("pre.rust.item-decl").Text()
sig = strings.TrimSpace(sig)
if sig != "" {
return sig
}
sig = doc.Find("pre.rust").First().Text()
return strings.TrimSpace(sig)
}
func (p *Parser) extractItemDoc(doc *goquery.Document) string {
docblock := doc.Find(".top-doc .docblock").First()
if docblock.Length() > 0 {
return strings.TrimSpace(docblock.Text())
}
docblock = doc.Find(".docblock").First()
if docblock.Length() > 0 {
return strings.TrimSpace(docblock.Text())
}
return ""
}
func (p *Parser) extractModules(doc *goquery.Document) []*Module {
var modules []*Module
doc.Find(".item-table .mod, .module-item .mod").Each(func(_ int, s *goquery.Selection) {
mod := &Module{}
mod.Name = strings.TrimSpace(s.Find("a.mod").Text())
if mod.Name == "" {
mod.Name = strings.TrimSpace(s.Find("a").First().Text())
}
if href, exists := s.Find("a").First().Attr("href"); exists {
mod.DocURL = resolveURL(p.baseURL, href)
}
mod.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
mod.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
if mod.Name != "" {
modules = append(modules, mod)
}
})
return modules
}
func (p *Parser) extractStructs(doc *goquery.Document) []*Struct {
var structs []*Struct
doc.Find(".item-table .struct, .struct").Each(func(_ int, s *goquery.Selection) {
st := &Struct{}
st.Name = strings.TrimSpace(s.Find("a.struct").Text())
if st.Name == "" {
st.Name = strings.TrimSpace(s.Find("a").First().Text())
}
if href, exists := s.Find("a").First().Attr("href"); exists {
st.DocURL = resolveURL(p.baseURL, href)
}
st.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
st.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
if st.Name != "" {
structs = append(structs, st)
}
})
return structs
}
func (p *Parser) extractEnums(doc *goquery.Document) []*Enum {
var enums []*Enum
doc.Find(".item-table .enum, .enum").Each(func(_ int, s *goquery.Selection) {
e := &Enum{}
e.Name = strings.TrimSpace(s.Find("a.enum").Text())
if e.Name == "" {
e.Name = strings.TrimSpace(s.Find("a").First().Text())
}
if href, exists := s.Find("a").First().Attr("href"); exists {
e.DocURL = resolveURL(p.baseURL, href)
}
e.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
e.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
if e.Name != "" {
enums = append(enums, e)
}
})
return enums
}
func (p *Parser) extractTraits(doc *goquery.Document) []*Trait {
var traits []*Trait
doc.Find(".item-table .trait, .trait").Each(func(_ int, s *goquery.Selection) {
t := &Trait{}
t.Name = strings.TrimSpace(s.Find("a.trait").Text())
if t.Name == "" {
t.Name = strings.TrimSpace(s.Find("a").First().Text())
}
if href, exists := s.Find("a").First().Attr("href"); exists {
t.DocURL = resolveURL(p.baseURL, href)
}
t.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
t.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
if t.Name != "" {
traits = append(traits, t)
}
})
return traits
}
func (p *Parser) extractFunctions(doc *goquery.Document) []*Func {
var funcs []*Func
doc.Find(".item-table .fn, .fn, .function").Each(func(_ int, s *goquery.Selection) {
f := &Func{}
f.Name = strings.TrimSpace(s.Find("a.fn").Text())
if f.Name == "" {
f.Name = strings.TrimSpace(s.Find("a").First().Text())
}
if href, exists := s.Find("a").First().Attr("href"); exists {
f.DocURL = resolveURL(p.baseURL, href)
}
f.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
f.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
f.IsUnsafe = strings.Contains(s.Text(), "unsafe")
if f.Name != "" {
funcs = append(funcs, f)
}
})
return funcs
}
func (p *Parser) extractMacros(doc *goquery.Document) []*Macro {
var macros []*Macro
doc.Find(".item-table .macro, .macro").Each(func(_ int, s *goquery.Selection) {
m := &Macro{}
m.Name = strings.TrimSpace(s.Find("a.macro").Text())
if m.Name == "" {
m.Name = strings.TrimSpace(s.Find("a").First().Text())
}
if href, exists := s.Find("a").First().Attr("href"); exists {
m.DocURL = resolveURL(p.baseURL, href)
}
m.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
m.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
if m.Name != "" {
macros = append(macros, m)
}
})
return macros
}
func (p *Parser) extractConstants(doc *goquery.Document) []*Const {
var constants []*Const
doc.Find(".item-table .constant, .constant").Each(func(_ int, s *goquery.Selection) {
c := &Const{}
c.Name = strings.TrimSpace(s.Find("a.constant").Text())
if c.Name == "" {
c.Name = strings.TrimSpace(s.Find("a").First().Text())
}
if href, exists := s.Find("a").First().Attr("href"); exists {
c.DocURL = resolveURL(p.baseURL, href)
}
c.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
c.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
if c.Name != "" {
constants = append(constants, c)
}
})
return constants
}
func (p *Parser) extractStatics(doc *goquery.Document) []*Static {
var statics []*Static
doc.Find(".item-table .static, .static").Each(func(_ int, s *goquery.Selection) {
st := &Static{}
st.Name = strings.TrimSpace(s.Find("a.static").Text())
if st.Name == "" {
st.Name = strings.TrimSpace(s.Find("a").First().Text())
}
if href, exists := s.Find("a").First().Attr("href"); exists {
st.DocURL = resolveURL(p.baseURL, href)
}
st.Doc = strings.TrimSpace(s.Find(".desc, .item-desc").Text())
st.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
if st.Name != "" {
statics = append(statics, st)
}
})
return statics
}
func (p *Parser) ExtractMethods(doc *goquery.Document) []*Method {
var methods []*Method
doc.Find(".impl-items .method-toggle, details.method-toggle").Each(func(_ int, s *goquery.Selection) {
m := &Method{}
m.Name = strings.TrimSpace(s.Find(".fn, .method, h4.code-header").Text())
if m.Name == "" {
section := s.Find("section.method")
m.Name = strings.TrimSpace(section.Find(".fn").Text())
}
sig := s.Find("pre, .code-header, h4.code-header")
m.Signature = strings.TrimSpace(sig.Text())
m.Doc = strings.TrimSpace(s.Find(".docblock").Text())
m.IsUnsafe = strings.Contains(m.Signature, "unsafe")
m.IsAsync = strings.Contains(m.Signature, "async")
m.IsConst = strings.Contains(m.Signature, "const")
m.IsExperimental = s.Find(".stab.unstable, .stab.experimental").Length() > 0
if m.Name != "" {
methods = append(methods, m)
}
})
return methods
}
func (p *Parser) ExtractStructFields(doc *goquery.Document) []*Field {
var fields []*Field
doc.Find(".struct .fields tr, .struct-member").Each(func(_ int, s *goquery.Selection) {
f := &Field{}
f.Name = strings.TrimSpace(s.Find(".structfield, td:first-child").Text())
f.Type = strings.TrimSpace(s.Find(".type, td:nth-child(2)").Text())
f.Doc = strings.TrimSpace(s.Find(".docblock, td:last-child").Text())
f.IsPub = strings.Contains(s.Text(), "pub")
if f.Name != "" {
fields = append(fields, f)
}
})
return fields
}
func (p *Parser) ExtractEnumVariants(doc *goquery.Document) []*Variant {
var variants []*Variant
doc.Find(".enum .variants li, .variant").Each(func(_ int, s *goquery.Selection) {
v := &Variant{}
v.Name = strings.TrimSpace(s.Find("a, .variant-name").Text())
if v.Name == "" {
v.Name = strings.TrimSpace(s.Text())
}
v.Doc = strings.TrimSpace(s.Find(".docblock").Text())
sig := s.Text()
v.IsTuple = strings.Contains(sig, "(") && !strings.Contains(sig, "{")
v.IsStruct = strings.Contains(sig, "{")
v.IsUnit = !v.IsTuple && !v.IsStruct
if v.Name != "" {
variants = append(variants, v)
}
})
return variants
}
func extractKindFromClasses(classes string) string {
classList := strings.Fields(classes)
for _, c := range classList {
switch {
case strings.HasPrefix(c, "result-"):
kind := strings.TrimPrefix(c, "result-")
switch kind {
case "struct", "enum", "trait", "fn", "macro", "const", "static", "mod", "type", "primitive", "keyword", "attr":
return kind
case "method":
return "fn"
case "externcrate":
return "mod"
}
}
}
return ""
}
func resolveURL(base string, href string) string {
if strings.HasPrefix(href, "http") {
return href
}
baseURL, err := url.Parse(base)
if err != nil {
return href
}
hrefURL, err := url.Parse(href)
if err != nil {
return href
}
return baseURL.ResolveReference(hrefURL).String()
}
func cleanText(text string) string {
re := regexp.MustCompile(`\s+`)
text = re.ReplaceAllString(text, " ")
return strings.TrimSpace(text)
}
+265
View File
@@ -0,0 +1,265 @@
package rustdocs
import (
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
)
const testStructPageHTML = `
<!DOCTYPE html>
<html>
<body>
<main>
<div class="main-heading">
<div class="rustdoc-breadcrumbs"><a href="../index.html">std</a>::<wbr><a href="index.html">simd</a></div>
<h1>Struct <span class="struct">Mask</span></h1>
<span class="sub-heading"><span class="since" title="Stable since Rust version 1.0.0">1.0.0</span></span>
</div>
<pre class="rust item-decl"><code>pub struct Mask&lt;T, const N: usize&gt;(<span class="comment">/* private fields */</span>)</code></pre>
<details class="toggle top-doc" open="">
<div class="docblock"><p>A SIMD vector mask for N elements.</p></div>
</details>
<h2 id="implementations">Implementations</h2>
<details class="toggle implementors-toggle" open="">
<details class="toggle method-toggle" open="">
<section id="method.test" class="method">
<h4 class="code-header">pub fn <a href="#method.test" class="fn">test</a>(&amp;self, index: usize) -&gt; bool</h4>
</section>
<div class="docblock"><p>Tests the value of the specified element.</p></div>
</details>
</details>
</main>
</body>
</html>
`
const testSearchHTML = `
<div id="results">
<ul class="search-results active">
<a class="result-method" href="../std/simd/struct.Mask.html#method.test">
<span class="result-name">
<span class="typename">method</span>
<div class="path"><span>std::</span><span>simd::</span><span class="method">Mask::</span><span class="fn">test</span></div>
</span>
<div class="desc">Tests the value of the specified element.</div>
</a>
<a class="result-struct" href="../std/vec/struct.Vec.html">
<span class="result-name">
<span class="typename">struct</span>
<div class="path"><span>std::</span><span>vec::</span><span class="struct">Vec</span></div>
</span>
<div class="desc">A contiguous growable array type.</div>
</a>
<a class="result-fn" href="../std/io/fn.stdout.html">
<span class="result-name">
<span class="typename">fn</span>
<div class="path"><span>std::</span><span>io::</span><span class="fn">stdout</span></div>
</span>
<div class="desc">Constructs a new handle to the standard output.</div>
</a>
</ul>
</div>
`
const testCratePageHTML = `
<!DOCTYPE html>
<html>
<body>
<main>
<div class="main-heading">
<h1>Crate <span>serde</span></h1>
<span class="sub-heading"><span class="since">1.0.0</span></span>
</div>
<details class="toggle top-doc">
<div class="docblock"><p>A framework for serializing and deserializing Rust data structures.</p></div>
</details>
<h2 id="modules">Modules</h2>
<div class="item-table">
<div class="module-item"><a class="mod" href="de/index.html">de</a><div class="desc">Deserialize implementation.</div></div>
</div>
<h2 id="structs">Structs</h2>
<div class="item-table">
<div class="struct"><a class="struct" href="struct.Serializer.html">Serializer</a><div class="desc">A structure for serializing Rust values.</div></div>
</div>
<h2 id="enums">Enums</h2>
<div class="item-table">
<div class="enum"><a class="enum" href="enum.Error.html">Error</a><div class="desc">Errors during serialization.</div></div>
</div>
</main>
</body>
</html>
`
func TestParseItemPage(t *testing.T) {
parser := NewParser()
symbol, err := parser.ParseItemPage(testStructPageHTML, "https://docs.rs/std/simd/struct.Mask.html")
if err != nil {
t.Fatalf("ParseItemPage failed: %v", err)
}
if symbol.Name != "Mask" {
t.Errorf("Expected name 'Mask', got '%s'", symbol.Name)
}
if symbol.Kind != ItemKindStruct {
t.Errorf("Expected kind 'struct', got '%s'", symbol.Kind)
}
if symbol.Doc == "" {
t.Error("Expected non-empty doc")
}
if !strings.Contains(symbol.Signature, "struct Mask") {
t.Errorf("Expected signature to contain 'struct Mask', got '%s'", symbol.Signature)
}
}
func TestParseSearchResults(t *testing.T) {
parser := NewParser()
results, err := parser.ParseSearchResults(testSearchHTML)
if err != nil {
t.Fatalf("ParseSearchResults failed: %v", err)
}
if len(results) < 3 {
t.Fatalf("Expected at least 3 results, got %d", len(results))
}
method := results[0]
if method.Kind != "fn" {
t.Errorf("Expected kind 'fn' for method, got '%s'", method.Kind)
}
if method.Description == "" {
t.Error("Expected non-empty description")
}
structResult := results[1]
if structResult.Kind != "struct" {
t.Errorf("Expected kind 'struct', got '%s'", structResult.Kind)
}
fnResult := results[2]
if fnResult.Kind != "fn" {
t.Errorf("Expected kind 'fn', got '%s'", fnResult.Kind)
}
}
func TestParseCratePage(t *testing.T) {
parser := NewParser()
crate, err := parser.ParseCratePage(testCratePageHTML, "https://docs.rs/serde")
if err != nil {
t.Fatalf("ParseCratePage failed: %v", err)
}
if crate.Name == "" {
t.Error("Expected non-empty name")
}
if crate.Description == "" {
t.Error("Expected non-empty description")
}
}
func TestExtractKindFromClasses(t *testing.T) {
tests := []struct {
classes string
expected string
}{
{"result-struct", "struct"},
{"result-enum", "enum"},
{"result-trait", "trait"},
{"result-fn", "fn"},
{"result-macro", "macro"},
{"result-const", "const"},
{"result-static", "static"},
{"result-mod", "mod"},
{"result-method", "fn"},
{"result-externcrate", "mod"},
{"unknown-class", ""},
}
for _, tt := range tests {
t.Run(tt.classes, func(t *testing.T) {
got := extractKindFromClasses(tt.classes)
if got != tt.expected {
t.Errorf("extractKindFromClasses(%q) = %q, want %q", tt.classes, got, tt.expected)
}
})
}
}
func TestResolveURL(t *testing.T) {
tests := []struct {
base string
href string
expected string
}{
{"https://docs.rs", "/serde/struct.Serializer.html", "https://docs.rs/serde/struct.Serializer.html"},
{"https://docs.rs", "https://example.com/page", "https://example.com/page"},
}
for _, tt := range tests {
t.Run(tt.href, func(t *testing.T) {
got := resolveURL(tt.base, tt.href)
if got != tt.expected {
t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.href, got, tt.expected)
}
})
}
}
func TestCleanText(t *testing.T) {
tests := []struct {
input string
expected string
}{
{" hello world ", "hello world"},
{"single", "single"},
{"\n\ttabs\t\n", "tabs"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
if got := cleanText(tt.input); got != tt.expected {
t.Errorf("cleanText(%q) = %q, want %q", tt.input, got, tt.expected)
}
})
}
}
func TestExtractItemPath(t *testing.T) {
parser := NewParser()
html := `<div class="rustdoc-breadcrumbs"><a href="../index.html">std</a>::<a href="index.html">simd</a></div>`
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
t.Fatalf("Failed to parse HTML: %v", err)
}
got := parser.extractItemPath(doc, "https://docs.rs/std/simd/struct.Mask.html")
if !strings.Contains(got, "std") || !strings.Contains(got, "simd") {
t.Errorf("extractItemPath() = %q, expected to contain std and simd", got)
}
}
func TestExtractMethods(t *testing.T) {
parser := NewParser()
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testStructPageHTML))
if err != nil {
t.Fatalf("Failed to parse HTML: %v", err)
}
methods := parser.ExtractMethods(doc)
if len(methods) == 0 {
t.Error("Expected at least one method")
return
}
if methods[0].Name == "" {
t.Error("Expected non-empty method name")
}
}
+182
View File
@@ -0,0 +1,182 @@
package rustdocs
import "time"
type Crate struct {
Name string `json:"name"`
Version string `json:"version"`
Description string `json:"description"`
Modules []*Module `json:"modules,omitempty"`
Structs []*Struct `json:"structs,omitempty"`
Enums []*Enum `json:"enums,omitempty"`
Traits []*Trait `json:"traits,omitempty"`
Functions []*Func `json:"functions,omitempty"`
Macros []*Macro `json:"macros,omitempty"`
Constants []*Const `json:"constants,omitempty"`
Statics []*Static `json:"statics,omitempty"`
DocURL string `json:"doc_url"`
Repository string `json:"repository,omitempty"`
License string `json:"license,omitempty"`
FetchedAt time.Time `json:"fetched_at"`
}
type Module struct {
Name string `json:"name"`
Path string `json:"path"`
Doc string `json:"doc,omitempty"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
}
type Struct struct {
Name string `json:"name"`
Path string `json:"path"`
Doc string `json:"doc,omitempty"`
Declaration string `json:"declaration"`
Fields []*Field `json:"fields,omitempty"`
Methods []*Method `json:"methods,omitempty"`
TraitImpls []string `json:"trait_impls,omitempty"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
}
type Enum struct {
Name string `json:"name"`
Path string `json:"path"`
Doc string `json:"doc,omitempty"`
Declaration string `json:"declaration"`
Variants []*Variant `json:"variants,omitempty"`
Methods []*Method `json:"methods,omitempty"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
}
type Trait struct {
Name string `json:"name"`
Path string `json:"path"`
Doc string `json:"doc,omitempty"`
Declaration string `json:"declaration"`
Methods []*Method `json:"methods,omitempty"`
AssociatedTypes []string `json:"associated_types,omitempty"`
AssociatedConsts []string `json:"associated_consts,omitempty"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
}
type Func struct {
Name string `json:"name"`
Path string `json:"path"`
Doc string `json:"doc,omitempty"`
Signature string `json:"signature"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
IsUnsafe bool `json:"is_unsafe"`
IsConst bool `json:"is_const"`
IsAsync bool `json:"is_async"`
}
type Macro struct {
Name string `json:"name"`
Path string `json:"path"`
Doc string `json:"doc,omitempty"`
Signature string `json:"signature,omitempty"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
Examples []string `json:"examples,omitempty"`
}
type Const struct {
Name string `json:"name"`
Path string `json:"path"`
Doc string `json:"doc,omitempty"`
Type string `json:"type,omitempty"`
Value string `json:"value,omitempty"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
}
type Static struct {
Name string `json:"name"`
Path string `json:"path"`
Doc string `json:"doc,omitempty"`
Type string `json:"type,omitempty"`
IsMutable bool `json:"is_mutable"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
}
type Field struct {
Name string `json:"name"`
Type string `json:"type"`
Doc string `json:"doc,omitempty"`
IsPub bool `json:"is_pub"`
Visibility string `json:"visibility,omitempty"`
}
type Variant struct {
Name string `json:"name"`
Doc string `json:"doc,omitempty"`
Fields []string `json:"fields,omitempty"`
IsTuple bool `json:"is_tuple"`
IsStruct bool `json:"is_struct"`
IsUnit bool `json:"is_unit"`
}
type Method struct {
Name string `json:"name"`
Doc string `json:"doc,omitempty"`
Signature string `json:"signature"`
Receiver string `json:"receiver,omitempty"`
IsUnsafe bool `json:"is_unsafe"`
IsConst bool `json:"is_const"`
IsAsync bool `json:"is_async"`
IsDefault bool `json:"is_default"`
Examples []string `json:"examples,omitempty"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
}
type SearchResult struct {
Name string `json:"name"`
Path string `json:"path"`
Kind string `json:"kind"`
Description string `json:"description,omitempty"`
DocURL string `json:"doc_url"`
Stability string `json:"stability,omitempty"`
IsExperimental bool `json:"is_experimental"`
}
type ItemKind string
const (
ItemKindStruct ItemKind = "struct"
ItemKindEnum ItemKind = "enum"
ItemKindTrait ItemKind = "trait"
ItemKindFn ItemKind = "fn"
ItemKindMacro ItemKind = "macro"
ItemKindConst ItemKind = "const"
ItemKindStatic ItemKind = "static"
ItemKindMod ItemKind = "mod"
ItemKindType ItemKind = "type"
ItemKindUnion ItemKind = "union"
ItemKindPrimitive ItemKind = "primitive"
ItemKindKeyword ItemKind = "keyword"
ItemKindAttr ItemKind = "attr"
)
type Symbol struct {
Name string `json:"name"`
Kind ItemKind `json:"kind"`
Path string `json:"path"`
Signature string `json:"signature,omitempty"`
Doc string `json:"doc,omitempty"`
DocURL string `json:"doc_url"`
}