Files
Devour/pkg/pythondocs/parser_test.go
T
Tomas Dvorak 55885a0e8f first commit
2026-02-22 10:42:17 +01:00

282 lines
8.3 KiB
Go

package pythondocs
import (
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
)
const testModulePageHTML = `
<!DOCTYPE html>
<html>
<body>
<div class="body" role="main">
<section id="module-test">
<h1><code class="xref py py-mod docutils literal notranslate"><span class="pre">test</span></code> — Regression tests package<a class="headerlink" href="#module-test">¶</a></h1>
<p>The test package contains all regression tests for Python.</p>
<p>This is additional documentation.</p>
<dl class="py class">
<dt class="sig sig-object py" id="test.TestCase">
<em class="property"><span class="pre">class</span></em>
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
<span class="sig-name descname"><span class="pre">TestCase</span></span>
<span class="sig-paren">(</span><em class="sig-param"><span class="pre">methodName</span><span class="pre">=</span><span class="pre">'runTest'</span></em><span class="sig-paren">)</span>
<a class="headerlink" href="#test.TestCase">¶</a>
</dt>
<dd><p>A test case class.</p></dd>
</dl>
<dl class="py function">
<dt class="sig sig-object py" id="test.run_test">
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
<span class="sig-name descname"><span class="pre">run_test</span></span>
<span class="sig-paren">(</span><em class="sig-param"><span class="pre">name</span></em>, <em class="sig-param"><span class="pre">verbose</span><span class="pre">=</span><span class="pre">False</span></em><span class="sig-paren">)</span>
<a class="headerlink" href="#test.run_test">¶</a>
</dt>
<dd><p>Run a single test.</p></dd>
</dl>
<dl class="py exception">
<dt class="sig sig-object py" id="test.TestFailed">
<em class="property"><span class="pre">exception</span></em>
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
<span class="sig-name descname"><span class="pre">TestFailed</span></span>
<a class="headerlink" href="#test.TestFailed">¶</a>
</dt>
<dd><p>Exception raised when a test fails.</p></dd>
</dl>
<dl class="py data">
<dt class="sig sig-object py" id="test.verbose">
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
<span class="sig-name descname"><span class="pre">verbose</span></span>
<a class="headerlink" href="#test.verbose">¶</a>
</dt>
<dd><p>True when verbose output is enabled.</p></dd>
</dl>
</section>
</div>
</body>
</html>
`
const testClassHTML = `
<dl class="py class">
<dt class="sig sig-object py" id="test.TestCase">
<em class="property"><span class="pre">class</span></em>
<span class="sig-prename descclassname"><span class="pre">test.</span></span>
<span class="sig-name descname"><span class="pre">TestCase</span></span>
<span class="sig-paren">(</span><em class="sig-param"><span class="pre">methodName</span><span class="pre">=</span><span class="pre">'runTest'</span></em><span class="sig-paren">)</span>
</dt>
<dd>
<p>A test case class that provides testing functionality.</p>
<dl class="py method">
<dt class="sig sig-object py" id="test.TestCase.setUp">
<span class="sig-prename descclassname"><span class="pre">test.TestCase.</span></span>
<span class="sig-name descname"><span class="pre">setUp</span></span>
<span class="sig-paren">(</span><span class="sig-paren">)</span>
</dt>
<dd><p>Set up the test fixture.</p></dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="test.TestCase.tearDown">
<span class="sig-prename descclassname"><span class="pre">test.TestCase.</span></span>
<span class="sig-name descname"><span class="pre">tearDown</span></span>
<span class="sig-paren">(</span><span class="sig-paren">)</span>
</dt>
<dd><p>Tear down the test fixture.</p></dd>
</dl>
<dl class="py classmethod">
<dt class="sig sig-object py" id="test.TestCase.setUpClass">
<em class="property"><span class="pre">classmethod</span></em>
<span class="sig-prename descclassname"><span class="pre">test.TestCase.</span></span>
<span class="sig-name descname"><span class="pre">setUpClass</span></span>
<span class="sig-paren">(</span><span class="sig-paren">)</span>
</dt>
<dd><p>Set up the test class.</p></dd>
</dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="test.TestCase.maxDiff">
<span class="sig-prename descclassname"><span class="pre">test.TestCase.</span></span>
<span class="sig-name descname"><span class="pre">maxDiff</span></span>
</dt>
<dd><p>Maximum diff length.</p></dd>
</dl>
</dd>
</dl>
`
const testSearchHTML = `
<ul class="search">
<li class="kind-object">
<a href="library/test.html#module-test" data-score="26">test</a>
<span>(Python module, in test — Regression tests package)</span>
</li>
<li class="kind-object">
<a href="library/unittest.html#module-unittest" data-score="21">unittest</a>
<span>(Python module, in unittest — Unit testing framework)</span>
</li>
<li class="kind-text">
<a href="library/keyword.html" data-score="15">keyword — Testing for Python keywords</a>
<p class="context">This module allows a Python program to determine if a string is a keyword.</p>
</li>
</ul>
`
func TestParseModulePage(t *testing.T) {
parser := NewParser()
module, err := parser.ParseModulePage(testModulePageHTML, "https://docs.python.org/3/library/test.html")
if err != nil {
t.Fatalf("ParseModulePage failed: %v", err)
}
if module.Name == "" {
t.Error("Expected non-empty module name")
}
if module.Doc == "" {
t.Error("Expected non-empty doc")
}
if len(module.Classes) == 0 {
t.Error("Expected at least one class")
}
if len(module.Functions) == 0 {
t.Error("Expected at least one function")
}
if len(module.Exceptions) == 0 {
t.Error("Expected at least one exception")
}
if len(module.Constants) == 0 {
t.Error("Expected at least one constant/data")
}
}
func TestParseSearchResults(t *testing.T) {
parser := NewParser()
results, err := parser.ParseSearchResults(testSearchHTML)
if err != nil {
t.Fatalf("ParseSearchResults failed: %v", err)
}
if len(results) < 2 {
t.Fatalf("Expected at least 2 results, got %d", len(results))
}
first := results[0]
if first.Name == "" {
t.Error("Expected non-empty name")
}
if first.DocURL == "" {
t.Error("Expected non-empty doc URL")
}
}
func TestExtractClasses(t *testing.T) {
parser := NewParser()
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testClassHTML))
if err != nil {
t.Fatalf("Failed to parse HTML: %v", err)
}
classes := parser.extractClasses(doc, "test", "https://docs.python.org/3/library/test.html")
if len(classes) == 0 {
t.Fatal("Expected at least one class")
}
tc := classes[0]
if tc.Name == "" {
t.Error("Expected non-empty class name")
}
if len(tc.Methods) < 2 {
t.Errorf("Expected at least 2 methods, got %d", len(tc.Methods))
}
if len(tc.ClassMethods) == 0 {
t.Error("Expected at least one classmethod")
}
if len(tc.Attributes) == 0 {
t.Error("Expected at least one attribute")
}
}
func TestExtractFunctions(t *testing.T) {
parser := NewParser()
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testModulePageHTML))
if err != nil {
t.Fatalf("Failed to parse HTML: %v", err)
}
functions := parser.extractFunctions(doc, "test", "https://docs.python.org/3/library/test.html")
if len(functions) == 0 {
t.Fatal("Expected at least one function")
}
fn := functions[0]
if fn.Name == "" {
t.Error("Expected non-empty function name")
}
if fn.Signature == "" {
t.Error("Expected non-empty signature")
}
}
func TestResolveURL(t *testing.T) {
tests := []struct {
base string
href string
expected string
}{
{"https://docs.python.org", "/library/test.html", "https://docs.python.org/library/test.html"},
{"https://docs.python.org", "https://example.com/page", "https://example.com/page"},
}
for _, tt := range tests {
t.Run(tt.href, func(t *testing.T) {
got := resolveURL(tt.base, tt.href)
if got != tt.expected {
t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.href, got, tt.expected)
}
})
}
}
func TestExtractPathFromURL(t *testing.T) {
tests := []struct {
href string
expected string
}{
{"/library/test.html", "library/test"},
{"library/test.html", "library/test"},
{"/library/test.html#module-test", "library/test"},
}
for _, tt := range tests {
t.Run(tt.href, func(t *testing.T) {
got := extractPathFromURL(tt.href)
if got != tt.expected {
t.Errorf("extractPathFromURL(%q) = %q, want %q", tt.href, got, tt.expected)
}
})
}
}