mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,555 @@
|
||||
package pythondocs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
baseURL string
|
||||
}
|
||||
|
||||
func NewParser() *Parser {
|
||||
return &Parser{
|
||||
baseURL: "https://docs.python.org",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) ParseModulePage(html string, docURL string) (*Module, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
module := &Module{
|
||||
DocURL: docURL,
|
||||
FetchedAt: time.Now(),
|
||||
}
|
||||
|
||||
module.Name = p.extractModuleName(doc)
|
||||
module.Path = module.Name
|
||||
module.Doc = p.extractModuleDoc(doc)
|
||||
module.Synopsis = p.extractSynopsis(doc)
|
||||
module.Version = p.extractVersion(doc)
|
||||
|
||||
module.Classes = p.extractClasses(doc, module.Name, docURL)
|
||||
module.Functions = p.extractFunctions(doc, module.Name, docURL)
|
||||
module.Exceptions = p.extractExceptions(doc, module.Name, docURL)
|
||||
module.Constants = p.extractData(doc, module.Name, docURL)
|
||||
|
||||
return module, nil
|
||||
}
|
||||
|
||||
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []*SearchResult
|
||||
|
||||
doc.Find("ul.search li").Each(func(i int, s *goquery.Selection) {
|
||||
result := &SearchResult{}
|
||||
|
||||
classes, _ := s.Attr("class")
|
||||
if strings.Contains(classes, "kind-object") {
|
||||
result.Kind = "object"
|
||||
} else if strings.Contains(classes, "kind-text") {
|
||||
result.Kind = "text"
|
||||
} else if strings.Contains(classes, "kind-title") {
|
||||
result.Kind = "title"
|
||||
}
|
||||
|
||||
link := s.Find("a").First()
|
||||
result.Name = strings.TrimSpace(link.Text())
|
||||
|
||||
if href, exists := link.Attr("href"); exists {
|
||||
result.DocURL = resolveURL(p.baseURL, href)
|
||||
result.Path = extractPathFromURL(href)
|
||||
}
|
||||
|
||||
if score, exists := link.Attr("data-score"); exists {
|
||||
var scoreInt int
|
||||
for _, c := range score {
|
||||
if c >= '0' && c <= '9' {
|
||||
scoreInt = scoreInt*10 + int(c-'0')
|
||||
}
|
||||
}
|
||||
result.Score = scoreInt
|
||||
}
|
||||
|
||||
span := s.Find("span").Last()
|
||||
result.Description = strings.TrimSpace(span.Text())
|
||||
|
||||
results = append(results, result)
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (p *Parser) extractModuleName(doc *goquery.Document) string {
|
||||
section := doc.Find("section[id^='module-']").First()
|
||||
if section.Length() > 0 {
|
||||
id, _ := section.Attr("id")
|
||||
return strings.TrimPrefix(id, "module-")
|
||||
}
|
||||
|
||||
h1 := doc.Find("h1 code").First()
|
||||
if h1.Length() > 0 {
|
||||
return strings.TrimSpace(h1.Text())
|
||||
}
|
||||
|
||||
h1 = doc.Find(".body h1").First()
|
||||
if h1.Length() > 0 {
|
||||
text := h1.Text()
|
||||
if strings.HasPrefix(text, "—") {
|
||||
parts := strings.SplitN(text, "—", 2)
|
||||
if len(parts) > 0 {
|
||||
return strings.TrimSpace(parts[0])
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractModuleDoc(doc *goquery.Document) string {
|
||||
section := doc.Find("section[id^='module-']").First()
|
||||
if section.Length() == 0 {
|
||||
section = doc.Find(".body").First()
|
||||
}
|
||||
|
||||
docblock := section.Find("p").First()
|
||||
if docblock.Length() > 0 {
|
||||
return strings.TrimSpace(docblock.Text())
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractSynopsis(doc *goquery.Document) string {
|
||||
text := doc.Find(".body p").First().Text()
|
||||
text = strings.TrimSpace(text)
|
||||
if len(text) > 200 {
|
||||
return text[:197] + "..."
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
func (p *Parser) extractVersion(doc *goquery.Document) string {
|
||||
versionAdded := doc.Find(".versionadded").Text()
|
||||
if versionAdded != "" {
|
||||
re := regexp.MustCompile(`\d+\.\d+`)
|
||||
if match := re.FindString(versionAdded); match != "" {
|
||||
return match
|
||||
}
|
||||
}
|
||||
|
||||
versionChanged := doc.Find(".versionchanged").Text()
|
||||
if versionChanged != "" {
|
||||
re := regexp.MustCompile(`\d+\.\d+`)
|
||||
if match := re.FindString(versionChanged); match != "" {
|
||||
return match
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractClasses(doc *goquery.Document, moduleName string, docURL string) []*Class {
|
||||
var classes []*Class
|
||||
|
||||
doc.Find("dl.py.class").Each(func(_ int, s *goquery.Selection) {
|
||||
class := &Class{
|
||||
Module: moduleName,
|
||||
}
|
||||
|
||||
dt := s.Find("dt.sig-object").First()
|
||||
if dt.Length() == 0 {
|
||||
dt = s.Find("dt").First()
|
||||
}
|
||||
|
||||
sig := dt.Find("code.sig-prename")
|
||||
class.Name = strings.TrimSpace(sig.Find(".pre").Last().Text())
|
||||
if class.Name == "" {
|
||||
class.Name = strings.TrimSpace(dt.Find(".sig-name").Text())
|
||||
}
|
||||
if class.Name == "" {
|
||||
sigText := dt.Text()
|
||||
sigText = strings.TrimSpace(sigText)
|
||||
parts := strings.Fields(sigText)
|
||||
if len(parts) > 0 {
|
||||
class.Name = parts[0]
|
||||
}
|
||||
}
|
||||
|
||||
if id, exists := dt.Attr("id"); exists {
|
||||
class.QualName = id
|
||||
class.DocURL = docURL + "#" + id
|
||||
} else {
|
||||
class.QualName = class.Name
|
||||
class.DocURL = docURL
|
||||
}
|
||||
|
||||
class.Signature = strings.TrimSpace(dt.Text())
|
||||
|
||||
dd := s.Find("dd").First()
|
||||
class.Doc = strings.TrimSpace(dd.Find("p").First().Text())
|
||||
|
||||
bases := dt.Find("a.reference.internal")
|
||||
bases.Each(func(_ int, b *goquery.Selection) {
|
||||
base := strings.TrimSpace(b.Text())
|
||||
if base != "" && base != class.Name {
|
||||
class.Bases = append(class.Bases, base)
|
||||
}
|
||||
})
|
||||
|
||||
class.Methods = p.extractMethods(s, class.Name, docURL)
|
||||
class.ClassMethods = p.extractClassMethods(s, class.Name, docURL)
|
||||
class.StaticMethods = p.extractStaticMethods(s, class.Name, docURL)
|
||||
class.Attributes = p.extractAttributes(s, class.Name, docURL)
|
||||
|
||||
if class.Name != "" {
|
||||
classes = append(classes, class)
|
||||
}
|
||||
})
|
||||
|
||||
return classes
|
||||
}
|
||||
|
||||
func (p *Parser) extractFunctions(doc *goquery.Document, moduleName string, docURL string) []*Function {
|
||||
var functions []*Function
|
||||
|
||||
doc.Find("dl.py.function").Each(func(_ int, s *goquery.Selection) {
|
||||
fn := &Function{
|
||||
Module: moduleName,
|
||||
}
|
||||
|
||||
dt := s.Find("dt.sig-object").First()
|
||||
if dt.Length() == 0 {
|
||||
dt = s.Find("dt").First()
|
||||
}
|
||||
|
||||
sig := dt.Find("code.sig-prename")
|
||||
fn.Name = strings.TrimSpace(sig.Find(".pre").Last().Text())
|
||||
if fn.Name == "" {
|
||||
fn.Name = strings.TrimSpace(dt.Find(".sig-name").Text())
|
||||
}
|
||||
if fn.Name == "" {
|
||||
sigText := dt.Text()
|
||||
sigText = strings.TrimSpace(sigText)
|
||||
if idx := strings.Index(sigText, "("); idx > 0 {
|
||||
fn.Name = strings.TrimSpace(sigText[:idx])
|
||||
}
|
||||
}
|
||||
|
||||
if id, exists := dt.Attr("id"); exists {
|
||||
fn.QualName = id
|
||||
fn.DocURL = docURL + "#" + id
|
||||
} else {
|
||||
fn.QualName = fn.Name
|
||||
fn.DocURL = docURL
|
||||
}
|
||||
|
||||
fn.Signature = strings.TrimSpace(dt.Text())
|
||||
|
||||
dd := s.Find("dd").First()
|
||||
fn.Doc = strings.TrimSpace(dd.Find("p").First().Text())
|
||||
|
||||
fn.Parameters = p.extractParameters(dt)
|
||||
|
||||
if class := s.Find("dl.py.method, dl.py.classmethod, dl.py.staticmethod"); class.Length() > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if fn.Name != "" {
|
||||
functions = append(functions, fn)
|
||||
}
|
||||
})
|
||||
|
||||
return functions
|
||||
}
|
||||
|
||||
func (p *Parser) extractExceptions(doc *goquery.Document, moduleName string, docURL string) []*Exception {
|
||||
var exceptions []*Exception
|
||||
|
||||
doc.Find("dl.py.exception").Each(func(_ int, s *goquery.Selection) {
|
||||
exc := &Exception{
|
||||
Module: moduleName,
|
||||
}
|
||||
|
||||
dt := s.Find("dt.sig-object").First()
|
||||
if dt.Length() == 0 {
|
||||
dt = s.Find("dt").First()
|
||||
}
|
||||
|
||||
sig := dt.Find("code.sig-prename")
|
||||
exc.Name = strings.TrimSpace(sig.Find(".pre").Last().Text())
|
||||
if exc.Name == "" {
|
||||
exc.Name = strings.TrimSpace(dt.Find(".sig-name").Text())
|
||||
}
|
||||
if exc.Name == "" {
|
||||
sigText := dt.Text()
|
||||
sigText = strings.TrimSpace(sigText)
|
||||
if idx := strings.Index(sigText, "("); idx > 0 {
|
||||
exc.Name = strings.TrimSpace(sigText[:idx])
|
||||
}
|
||||
}
|
||||
|
||||
if id, exists := dt.Attr("id"); exists {
|
||||
exc.QualName = id
|
||||
exc.DocURL = docURL + "#" + id
|
||||
} else {
|
||||
exc.QualName = exc.Name
|
||||
exc.DocURL = docURL
|
||||
}
|
||||
|
||||
exc.Signature = strings.TrimSpace(dt.Text())
|
||||
|
||||
dd := s.Find("dd").First()
|
||||
exc.Doc = strings.TrimSpace(dd.Find("p").First().Text())
|
||||
|
||||
if exc.Name != "" {
|
||||
exceptions = append(exceptions, exc)
|
||||
}
|
||||
})
|
||||
|
||||
return exceptions
|
||||
}
|
||||
|
||||
func (p *Parser) extractData(doc *goquery.Document, moduleName string, docURL string) []*Data {
|
||||
var dataList []*Data
|
||||
|
||||
doc.Find("dl.py.data").Each(func(_ int, s *goquery.Selection) {
|
||||
data := &Data{
|
||||
Module: moduleName,
|
||||
}
|
||||
|
||||
dt := s.Find("dt.sig-object").First()
|
||||
if dt.Length() == 0 {
|
||||
dt = s.Find("dt").First()
|
||||
}
|
||||
|
||||
sig := dt.Find("code.sig-prename")
|
||||
data.Name = strings.TrimSpace(sig.Find(".pre").Last().Text())
|
||||
if data.Name == "" {
|
||||
sigText := dt.Text()
|
||||
sigText = strings.TrimSpace(sigText)
|
||||
data.Name = strings.Fields(sigText)[0]
|
||||
}
|
||||
|
||||
if id, exists := dt.Attr("id"); exists {
|
||||
data.DocURL = docURL + "#" + id
|
||||
} else {
|
||||
data.DocURL = docURL
|
||||
}
|
||||
|
||||
dd := s.Find("dd").First()
|
||||
data.Doc = strings.TrimSpace(dd.Find("p").First().Text())
|
||||
|
||||
if data.Name != "" {
|
||||
dataList = append(dataList, data)
|
||||
}
|
||||
})
|
||||
|
||||
return dataList
|
||||
}
|
||||
|
||||
func (p *Parser) extractMethods(parent *goquery.Selection, className string, docURL string) []*Method {
|
||||
var methods []*Method
|
||||
|
||||
parent.Find("dl.py.method").Each(func(_ int, s *goquery.Selection) {
|
||||
method := p.parseMethod(s, className, docURL, false, false)
|
||||
if method != nil {
|
||||
methods = append(methods, method)
|
||||
}
|
||||
})
|
||||
|
||||
return methods
|
||||
}
|
||||
|
||||
func (p *Parser) extractClassMethods(parent *goquery.Selection, className string, docURL string) []*Method {
|
||||
var methods []*Method
|
||||
|
||||
parent.Find("dl.py.classmethod").Each(func(_ int, s *goquery.Selection) {
|
||||
method := p.parseMethod(s, className, docURL, true, false)
|
||||
if method != nil {
|
||||
methods = append(methods, method)
|
||||
}
|
||||
})
|
||||
|
||||
return methods
|
||||
}
|
||||
|
||||
func (p *Parser) extractStaticMethods(parent *goquery.Selection, className string, docURL string) []*Method {
|
||||
var methods []*Method
|
||||
|
||||
parent.Find("dl.py.staticmethod").Each(func(_ int, s *goquery.Selection) {
|
||||
method := p.parseMethod(s, className, docURL, false, true)
|
||||
if method != nil {
|
||||
methods = append(methods, method)
|
||||
}
|
||||
})
|
||||
|
||||
return methods
|
||||
}
|
||||
|
||||
func (p *Parser) parseMethod(s *goquery.Selection, className string, docURL string, isClassMethod bool, isStatic bool) *Method {
|
||||
method := &Method{
|
||||
Class: className,
|
||||
IsClassMethod: isClassMethod,
|
||||
IsStatic: isStatic,
|
||||
}
|
||||
|
||||
dt := s.Find("dt.sig-object").First()
|
||||
if dt.Length() == 0 {
|
||||
dt = s.Find("dt").First()
|
||||
}
|
||||
|
||||
sig := dt.Find("code.sig-prename")
|
||||
method.Name = strings.TrimSpace(sig.Find(".pre").Last().Text())
|
||||
if method.Name == "" {
|
||||
method.Name = strings.TrimSpace(dt.Find(".sig-name").Text())
|
||||
}
|
||||
if method.Name == "" {
|
||||
sigText := dt.Text()
|
||||
sigText = strings.TrimSpace(sigText)
|
||||
if idx := strings.Index(sigText, "("); idx > 0 {
|
||||
name := strings.TrimSpace(sigText[:idx])
|
||||
parts := strings.Split(name, ".")
|
||||
method.Name = parts[len(parts)-1]
|
||||
}
|
||||
}
|
||||
|
||||
if id, exists := dt.Attr("id"); exists {
|
||||
method.QualName = id
|
||||
method.DocURL = docURL + "#" + id
|
||||
} else {
|
||||
method.QualName = className + "." + method.Name
|
||||
method.DocURL = docURL
|
||||
}
|
||||
|
||||
method.Signature = strings.TrimSpace(dt.Text())
|
||||
|
||||
dd := s.Find("dd").First()
|
||||
method.Doc = strings.TrimSpace(dd.Find("p").First().Text())
|
||||
|
||||
method.Parameters = p.extractParameters(dt)
|
||||
|
||||
if method.Name != "" {
|
||||
return method
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parser) extractAttributes(parent *goquery.Selection, className string, docURL string) []*Attribute {
|
||||
var attributes []*Attribute
|
||||
|
||||
parent.Find("dl.py.attribute").Each(func(_ int, s *goquery.Selection) {
|
||||
attr := &Attribute{
|
||||
Class: className,
|
||||
}
|
||||
|
||||
dt := s.Find("dt.sig-object").First()
|
||||
if dt.Length() == 0 {
|
||||
dt = s.Find("dt").First()
|
||||
}
|
||||
|
||||
sig := dt.Find("code.sig-prename")
|
||||
attr.Name = strings.TrimSpace(sig.Find(".pre").Last().Text())
|
||||
if attr.Name == "" {
|
||||
sigText := dt.Text()
|
||||
sigText = strings.TrimSpace(sigText)
|
||||
attr.Name = strings.Fields(sigText)[0]
|
||||
}
|
||||
|
||||
if id, exists := dt.Attr("id"); exists {
|
||||
attr.DocURL = docURL + "#" + id
|
||||
} else {
|
||||
attr.DocURL = docURL
|
||||
}
|
||||
|
||||
dd := s.Find("dd").First()
|
||||
attr.Doc = strings.TrimSpace(dd.Find("p").First().Text())
|
||||
|
||||
if attr.Name != "" {
|
||||
attributes = append(attributes, attr)
|
||||
}
|
||||
})
|
||||
|
||||
return attributes
|
||||
}
|
||||
|
||||
func (p *Parser) extractParameters(dt *goquery.Selection) []*Param {
|
||||
var params []*Param
|
||||
|
||||
dt.Find("em.sig-param").Each(func(_ int, em *goquery.Selection) {
|
||||
param := &Param{}
|
||||
|
||||
text := strings.TrimSpace(em.Text())
|
||||
|
||||
if strings.HasPrefix(text, "*") && !strings.HasPrefix(text, "**") {
|
||||
param.IsVarArgs = true
|
||||
text = strings.TrimPrefix(text, "*")
|
||||
} else if strings.HasPrefix(text, "**") {
|
||||
param.IsKWArgs = true
|
||||
text = strings.TrimPrefix(text, "**")
|
||||
}
|
||||
|
||||
if strings.Contains(text, "=") {
|
||||
parts := strings.SplitN(text, "=", 2)
|
||||
param.Name = strings.TrimSpace(parts[0])
|
||||
param.Default = strings.TrimSpace(parts[1])
|
||||
} else {
|
||||
param.Name = text
|
||||
}
|
||||
|
||||
if param.Name != "" {
|
||||
params = append(params, param)
|
||||
}
|
||||
})
|
||||
|
||||
return params
|
||||
}
|
||||
|
||||
func extractPathFromURL(href string) string {
|
||||
u, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
path := u.Path
|
||||
path = strings.TrimSuffix(path, ".html")
|
||||
path = strings.TrimSuffix(path, "/")
|
||||
path = strings.TrimPrefix(path, "/")
|
||||
|
||||
if strings.Contains(path, "#") {
|
||||
parts := strings.Split(path, "#")
|
||||
path = parts[0]
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
func resolveURL(base string, href string) string {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
return href
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return baseURL.ResolveReference(hrefURL).String()
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
package pythondocs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const testModulePageHTML = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<div class="body" role="main">
|
||||
<section id="module-test">
|
||||
<h1><code class="xref py py-mod docutils literal notranslate"><span class="pre">test</span></code> — Regression tests package<a class="headerlink" href="#module-test">¶</a></h1>
|
||||
<p>The test package contains all regression tests for Python.</p>
|
||||
<p>This is additional documentation.</p>
|
||||
|
||||
<dl class="py class">
|
||||
<dt class="sig sig-object py" id="test.TestCase">
|
||||
<em class="property"><span class="pre">class</span></em>
|
||||
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">TestCase</span></span>
|
||||
<span class="sig-paren">(</span><em class="sig-param"><span class="pre">methodName</span><span class="pre">=</span><span class="pre">'runTest'</span></em><span class="sig-paren">)</span>
|
||||
<a class="headerlink" href="#test.TestCase">¶</a>
|
||||
</dt>
|
||||
<dd><p>A test case class.</p></dd>
|
||||
</dl>
|
||||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="test.run_test">
|
||||
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">run_test</span></span>
|
||||
<span class="sig-paren">(</span><em class="sig-param"><span class="pre">name</span></em>, <em class="sig-param"><span class="pre">verbose</span><span class="pre">=</span><span class="pre">False</span></em><span class="sig-paren">)</span>
|
||||
<a class="headerlink" href="#test.run_test">¶</a>
|
||||
</dt>
|
||||
<dd><p>Run a single test.</p></dd>
|
||||
</dl>
|
||||
|
||||
<dl class="py exception">
|
||||
<dt class="sig sig-object py" id="test.TestFailed">
|
||||
<em class="property"><span class="pre">exception</span></em>
|
||||
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">TestFailed</span></span>
|
||||
<a class="headerlink" href="#test.TestFailed">¶</a>
|
||||
</dt>
|
||||
<dd><p>Exception raised when a test fails.</p></dd>
|
||||
</dl>
|
||||
|
||||
<dl class="py data">
|
||||
<dt class="sig sig-object py" id="test.verbose">
|
||||
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">verbose</span></span>
|
||||
<a class="headerlink" href="#test.verbose">¶</a>
|
||||
</dt>
|
||||
<dd><p>True when verbose output is enabled.</p></dd>
|
||||
</dl>
|
||||
|
||||
</section>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
const testClassHTML = `
|
||||
<dl class="py class">
|
||||
<dt class="sig sig-object py" id="test.TestCase">
|
||||
<em class="property"><span class="pre">class</span></em>
|
||||
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">TestCase</span></span>
|
||||
<span class="sig-paren">(</span><em class="sig-param"><span class="pre">methodName</span><span class="pre">=</span><span class="pre">'runTest'</span></em><span class="sig-paren">)</span>
|
||||
</dt>
|
||||
<dd>
|
||||
<p>A test case class that provides testing functionality.</p>
|
||||
|
||||
<dl class="py method">
|
||||
<dt class="sig sig-object py" id="test.TestCase.setUp">
|
||||
<span class="sig-prename descclassname"><span class="pre">test.TestCase.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">setUp</span></span>
|
||||
<span class="sig-paren">(</span><span class="sig-paren">)</span>
|
||||
</dt>
|
||||
<dd><p>Set up the test fixture.</p></dd>
|
||||
</dl>
|
||||
|
||||
<dl class="py method">
|
||||
<dt class="sig sig-object py" id="test.TestCase.tearDown">
|
||||
<span class="sig-prename descclassname"><span class="pre">test.TestCase.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">tearDown</span></span>
|
||||
<span class="sig-paren">(</span><span class="sig-paren">)</span>
|
||||
</dt>
|
||||
<dd><p>Tear down the test fixture.</p></dd>
|
||||
</dl>
|
||||
|
||||
<dl class="py classmethod">
|
||||
<dt class="sig sig-object py" id="test.TestCase.setUpClass">
|
||||
<em class="property"><span class="pre">classmethod</span></em>
|
||||
<span class="sig-prename descclassname"><span class="pre">test.TestCase.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">setUpClass</span></span>
|
||||
<span class="sig-paren">(</span><span class="sig-paren">)</span>
|
||||
</dt>
|
||||
<dd><p>Set up the test class.</p></dd>
|
||||
</dl>
|
||||
|
||||
<dl class="py attribute">
|
||||
<dt class="sig sig-object py" id="test.TestCase.maxDiff">
|
||||
<span class="sig-prename descclassname"><span class="pre">test.TestCase.</span></span>
|
||||
<span class="sig-name descname"><span class="pre">maxDiff</span></span>
|
||||
</dt>
|
||||
<dd><p>Maximum diff length.</p></dd>
|
||||
</dl>
|
||||
|
||||
</dd>
|
||||
</dl>
|
||||
`
|
||||
|
||||
const testSearchHTML = `
|
||||
<ul class="search">
|
||||
<li class="kind-object">
|
||||
<a href="library/test.html#module-test" data-score="26">test</a>
|
||||
<span>(Python module, in test — Regression tests package)</span>
|
||||
</li>
|
||||
<li class="kind-object">
|
||||
<a href="library/unittest.html#module-unittest" data-score="21">unittest</a>
|
||||
<span>(Python module, in unittest — Unit testing framework)</span>
|
||||
</li>
|
||||
<li class="kind-text">
|
||||
<a href="library/keyword.html" data-score="15">keyword — Testing for Python keywords</a>
|
||||
<p class="context">This module allows a Python program to determine if a string is a keyword.</p>
|
||||
</li>
|
||||
</ul>
|
||||
`
|
||||
|
||||
func TestParseModulePage(t *testing.T) {
|
||||
parser := NewParser()
|
||||
module, err := parser.ParseModulePage(testModulePageHTML, "https://docs.python.org/3/library/test.html")
|
||||
if err != nil {
|
||||
t.Fatalf("ParseModulePage failed: %v", err)
|
||||
}
|
||||
|
||||
if module.Name == "" {
|
||||
t.Error("Expected non-empty module name")
|
||||
}
|
||||
|
||||
if module.Doc == "" {
|
||||
t.Error("Expected non-empty doc")
|
||||
}
|
||||
|
||||
if len(module.Classes) == 0 {
|
||||
t.Error("Expected at least one class")
|
||||
}
|
||||
|
||||
if len(module.Functions) == 0 {
|
||||
t.Error("Expected at least one function")
|
||||
}
|
||||
|
||||
if len(module.Exceptions) == 0 {
|
||||
t.Error("Expected at least one exception")
|
||||
}
|
||||
|
||||
if len(module.Constants) == 0 {
|
||||
t.Error("Expected at least one constant/data")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSearchResults(t *testing.T) {
|
||||
parser := NewParser()
|
||||
results, err := parser.ParseSearchResults(testSearchHTML)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseSearchResults failed: %v", err)
|
||||
}
|
||||
|
||||
if len(results) < 2 {
|
||||
t.Fatalf("Expected at least 2 results, got %d", len(results))
|
||||
}
|
||||
|
||||
first := results[0]
|
||||
if first.Name == "" {
|
||||
t.Error("Expected non-empty name")
|
||||
}
|
||||
|
||||
if first.DocURL == "" {
|
||||
t.Error("Expected non-empty doc URL")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractClasses(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testClassHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
classes := parser.extractClasses(doc, "test", "https://docs.python.org/3/library/test.html")
|
||||
|
||||
if len(classes) == 0 {
|
||||
t.Fatal("Expected at least one class")
|
||||
}
|
||||
|
||||
tc := classes[0]
|
||||
if tc.Name == "" {
|
||||
t.Error("Expected non-empty class name")
|
||||
}
|
||||
|
||||
if len(tc.Methods) < 2 {
|
||||
t.Errorf("Expected at least 2 methods, got %d", len(tc.Methods))
|
||||
}
|
||||
|
||||
if len(tc.ClassMethods) == 0 {
|
||||
t.Error("Expected at least one classmethod")
|
||||
}
|
||||
|
||||
if len(tc.Attributes) == 0 {
|
||||
t.Error("Expected at least one attribute")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractFunctions(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testModulePageHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
functions := parser.extractFunctions(doc, "test", "https://docs.python.org/3/library/test.html")
|
||||
|
||||
if len(functions) == 0 {
|
||||
t.Fatal("Expected at least one function")
|
||||
}
|
||||
|
||||
fn := functions[0]
|
||||
if fn.Name == "" {
|
||||
t.Error("Expected non-empty function name")
|
||||
}
|
||||
|
||||
if fn.Signature == "" {
|
||||
t.Error("Expected non-empty signature")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
base string
|
||||
href string
|
||||
expected string
|
||||
}{
|
||||
{"https://docs.python.org", "/library/test.html", "https://docs.python.org/library/test.html"},
|
||||
{"https://docs.python.org", "https://example.com/page", "https://example.com/page"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.href, func(t *testing.T) {
|
||||
got := resolveURL(tt.base, tt.href)
|
||||
if got != tt.expected {
|
||||
t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.href, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractPathFromURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
href string
|
||||
expected string
|
||||
}{
|
||||
{"/library/test.html", "library/test"},
|
||||
{"library/test.html", "library/test"},
|
||||
{"/library/test.html#module-test", "library/test"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.href, func(t *testing.T) {
|
||||
got := extractPathFromURL(tt.href)
|
||||
if got != tt.expected {
|
||||
t.Errorf("extractPathFromURL(%q) = %q, want %q", tt.href, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
package pythondocs
|
||||
|
||||
import "time"
|
||||
|
||||
type Module struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Synopsis string `json:"synopsis,omitempty"`
|
||||
Classes []*Class `json:"classes,omitempty"`
|
||||
Functions []*Function `json:"functions,omitempty"`
|
||||
Exceptions []*Exception `json:"exceptions,omitempty"`
|
||||
Constants []*Data `json:"constants,omitempty"`
|
||||
Submodules []string `json:"submodules,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Version string `json:"version,omitempty"`
|
||||
FetchedAt time.Time `json:"fetched_at"`
|
||||
}
|
||||
|
||||
type Package struct {
|
||||
Name string `json:"name"`
|
||||
Version string `json:"version,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Modules []*Module `json:"modules,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Repository string `json:"repository,omitempty"`
|
||||
License string `json:"license,omitempty"`
|
||||
FetchedAt time.Time `json:"fetched_at"`
|
||||
}
|
||||
|
||||
type Class struct {
|
||||
Name string `json:"name"`
|
||||
Module string `json:"module"`
|
||||
QualName string `json:"qual_name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
Bases []string `json:"bases,omitempty"`
|
||||
Methods []*Method `json:"methods,omitempty"`
|
||||
ClassMethods []*Method `json:"classmethods,omitempty"`
|
||||
StaticMethods []*Method `json:"staticmethods,omitempty"`
|
||||
Attributes []*Attribute `json:"attributes,omitempty"`
|
||||
Properties []*Property `json:"properties,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
type Function struct {
|
||||
Name string `json:"name"`
|
||||
Module string `json:"module"`
|
||||
QualName string `json:"qual_name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature"`
|
||||
Parameters []*Param `json:"parameters,omitempty"`
|
||||
Returns *Return `json:"returns,omitempty"`
|
||||
Raises []string `json:"raises,omitempty"`
|
||||
Examples []string `json:"examples,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
IsAsync bool `json:"is_async"`
|
||||
IsGenerator bool `json:"is_generator"`
|
||||
Decorator string `json:"decorator,omitempty"`
|
||||
}
|
||||
|
||||
type Method struct {
|
||||
Name string `json:"name"`
|
||||
Class string `json:"class"`
|
||||
QualName string `json:"qual_name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature"`
|
||||
Parameters []*Param `json:"parameters,omitempty"`
|
||||
Returns *Return `json:"returns,omitempty"`
|
||||
Raises []string `json:"raises,omitempty"`
|
||||
Examples []string `json:"examples,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
IsAsync bool `json:"is_async"`
|
||||
IsClassMethod bool `json:"is_classmethod"`
|
||||
IsStatic bool `json:"is_static"`
|
||||
IsAbstract bool `json:"is_abstract"`
|
||||
IsProperty bool `json:"is_property"`
|
||||
}
|
||||
|
||||
type Property struct {
|
||||
Name string `json:"name"`
|
||||
Class string `json:"class"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Getter string `json:"getter,omitempty"`
|
||||
Setter string `json:"setter,omitempty"`
|
||||
Deleter string `json:"deleter,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
IsReadOnly bool `json:"is_readonly"`
|
||||
}
|
||||
|
||||
type Attribute struct {
|
||||
Name string `json:"name"`
|
||||
Class string `json:"class"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Value string `json:"value,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
type Exception struct {
|
||||
Name string `json:"name"`
|
||||
Module string `json:"module"`
|
||||
QualName string `json:"qual_name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
Bases []string `json:"bases,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
type Data struct {
|
||||
Name string `json:"name"`
|
||||
Module string `json:"module"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Value string `json:"value,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
IsConst bool `json:"is_const"`
|
||||
}
|
||||
|
||||
type Param struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Default string `json:"default,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
IsPositional bool `json:"is_positional"`
|
||||
IsKeyword bool `json:"is_keyword"`
|
||||
IsVarArgs bool `json:"is_varargs"`
|
||||
IsKWArgs bool `json:"is_kwargs"`
|
||||
}
|
||||
|
||||
type Return struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
}
|
||||
|
||||
type SearchResult struct {
|
||||
Name string `json:"name"`
|
||||
Kind string `json:"kind"`
|
||||
Path string `json:"path"`
|
||||
Module string `json:"module,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Score int `json:"score"`
|
||||
}
|
||||
|
||||
type ItemKind string
|
||||
|
||||
const (
|
||||
ItemKindModule ItemKind = "module"
|
||||
ItemKindClass ItemKind = "class"
|
||||
ItemKindFunction ItemKind = "function"
|
||||
ItemKindMethod ItemKind = "method"
|
||||
ItemKindException ItemKind = "exception"
|
||||
ItemKindData ItemKind = "data"
|
||||
ItemKindAttribute ItemKind = "attribute"
|
||||
ItemKindProperty ItemKind = "property"
|
||||
)
|
||||
|
||||
type Symbol struct {
|
||||
Name string `json:"name"`
|
||||
Kind ItemKind `json:"kind"`
|
||||
Module string `json:"module"`
|
||||
QualName string `json:"qual_name,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
Reference in New Issue
Block a user