first commit

This commit is contained in:
Tomas Dvorak
2026-02-22 10:42:17 +01:00
commit 55885a0e8f
239 changed files with 103690 additions and 0 deletions
+687
View File
@@ -0,0 +1,687 @@
package godocs
import (
"net/url"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// Parser parses pkg.go.dev HTML pages into structured documentation.
type Parser struct {
baseURL string
}
// NewParser creates a new parser for pkg.go.dev content.
func NewParser() *Parser {
return &Parser{
baseURL: "https://pkg.go.dev",
}
}
// ParsePackagePage parses a pkg.go.dev package documentation page.
func (p *Parser) ParsePackagePage(html string, docURL string) (*Package, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
pkg := &Package{
DocURL: docURL,
FetchedAt: time.Now(),
}
// Extract import path from URL or breadcrumb
pkg.ImportPath = p.extractImportPath(doc, docURL)
pkg.Name = p.extractPackageName(doc)
// Extract synopsis
pkg.Synopsis = p.extractSynopsis(doc)
// Extract package documentation
pkg.Doc = p.extractPackageDoc(doc)
// Extract version info
pkg.Version = p.extractVersion(doc)
// Extract module info
pkg.Module = p.extractModule(doc)
// Extract licenses
pkg.Licenses = p.extractLicenses(doc)
// Extract imported by count
pkg.ImportedBy = p.extractImportedBy(doc)
// Extract repository URL
pkg.Repository = p.extractRepository(doc)
// Extract functions
pkg.Functions = p.extractFunctions(doc)
// Extract types
pkg.Types = p.extractTypes(doc)
// Extract constants
pkg.Constants = p.extractConstants(doc)
// Extract variables
pkg.Variables = p.extractVariables(doc)
// Extract examples
pkg.Examples = p.extractPackageExamples(doc)
return pkg, nil
}
// ParseSearchResults parses pkg.go.dev search results page.
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
var results []*SearchResult
doc.Find(".SearchSnippet").Each(func(i int, s *goquery.Selection) {
result := &SearchResult{}
// Extract name and path
s.Find("h2 a").Each(func(_ int, a *goquery.Selection) {
result.Name = strings.TrimSpace(a.Text())
if href, exists := a.Attr("href"); exists {
result.URL = p.baseURL + href
result.Path = strings.TrimPrefix(href, "/")
}
})
// Extract path from span
pathSpan := s.Find(".SearchSnippet-header-path")
if pathSpan.Length() > 0 {
result.Path = strings.Trim(pathSpan.Text(), "()")
}
// Extract synopsis
synopsis := s.Find(".SearchSnippet-synopsis")
if synopsis.Length() > 0 {
result.Synopsis = strings.TrimSpace(synopsis.Text())
}
// Extract imported by count
infoLabel := s.Find(".SearchSnippet-infoLabel").Text()
if strings.Contains(infoLabel, "Imported by") {
re := regexp.MustCompile(`Imported by\s+(\d[\d,]*)`)
if matches := re.FindStringSubmatch(infoLabel); len(matches) > 1 {
countStr := strings.ReplaceAll(matches[1], ",", "")
result.ImportedBy = parseCount(countStr)
}
}
// Extract version
versionMatch := regexp.MustCompile(`v?\d+\.\d+(?:\.\d+)?`).FindString(infoLabel)
result.Version = versionMatch
// Extract license
license := s.Find("[data-test-id='snippet-license'] a")
if license.Length() > 0 {
result.License = strings.TrimSpace(license.Text())
}
results = append(results, result)
})
return results, nil
}
// extractImportPath extracts the import path from the page.
func (p *Parser) extractImportPath(doc *goquery.Document, docURL string) string {
// Try to extract from breadcrumb
var importPath string
doc.Find(".go-Breadcrumb li a").Each(func(i int, s *goquery.Selection) {
if i > 0 { // Skip "Discover Packages"
part := strings.TrimSpace(s.Text())
if part != "" {
if importPath != "" {
importPath += "/"
}
importPath += part
}
}
})
if importPath != "" {
return importPath
}
// Fallback: extract from URL
if docURL != "" {
u, err := url.Parse(docURL)
if err == nil {
path := strings.TrimPrefix(u.Path, "/")
// Remove version suffix like @v1.0.0
if idx := strings.Index(path, "@"); idx > 0 {
path = path[:idx]
}
return path
}
}
return ""
}
// extractPackageName extracts the package name.
func (p *Parser) extractPackageName(doc *goquery.Document) string {
// Try UnitHeader-title
title := doc.Find(".UnitHeader-titleHeading").Text()
title = strings.TrimSpace(title)
if title != "" {
return title
}
// Fallback to h1
title = doc.Find("h1").First().Text()
return strings.TrimSpace(title)
}
// extractSynopsis extracts the package synopsis.
func (p *Parser) extractSynopsis(doc *goquery.Document) string {
// Synopsis is typically in the first paragraph after the package declaration
docSection := doc.Find(".Documentation").First()
if docSection.Length() > 0 {
// Get the first paragraph
firstP := docSection.Find("p").First()
if firstP.Length() > 0 {
synopsis := strings.TrimSpace(firstP.Text())
// Limit to reasonable length
if len(synopsis) > 200 {
synopsis = synopsis[:197] + "..."
}
return synopsis
}
}
return ""
}
// extractPackageDoc extracts the full package documentation.
func (p *Parser) extractPackageDoc(doc *goquery.Document) string {
var parts []string
doc.Find(".Documentation").Each(func(_ int, s *goquery.Selection) {
text := s.Text()
text = cleanWhitespace(text)
if text != "" {
parts = append(parts, text)
}
})
return strings.Join(parts, "\n\n")
}
// extractVersion extracts the version info.
func (p *Parser) extractVersion(doc *goquery.Document) string {
versionEl := doc.Find("[data-test-id='UnitHeader-version'] a")
if versionEl.Length() > 0 {
return strings.TrimSpace(versionEl.Text())
}
return ""
}
// extractModule extracts module information.
func (p *Parser) extractModule(doc *goquery.Document) *Module {
modulePath := ""
moduleVersion := ""
// Try to extract from version link
versionEl := doc.Find("[data-test-id='UnitHeader-version'] a")
if versionEl.Length() > 0 {
moduleVersion = strings.TrimSpace(versionEl.Text())
}
// Extract module path from breadcrumb
doc.Find(".go-Breadcrumb li a").Each(func(i int, s *goquery.Selection) {
text := strings.TrimSpace(s.Text())
if strings.Contains(text, "/") && i > 0 {
modulePath = text
}
})
if modulePath != "" {
return &Module{
Path: modulePath,
Version: moduleVersion,
}
}
return nil
}
// extractLicenses extracts license information.
func (p *Parser) extractLicenses(doc *goquery.Document) []License {
var licenses []License
doc.Find("[data-test-id='UnitHeader-license']").Each(func(_ int, s *goquery.Selection) {
name := strings.TrimSpace(s.Text())
if name != "" {
license := License{Name: name}
if href, exists := s.Attr("href"); exists {
license.Path = href
}
licenses = append(licenses, license)
}
})
return licenses
}
// extractImportedBy extracts the import count.
func (p *Parser) extractImportedBy(doc *goquery.Document) int {
importEl := doc.Find("[data-test-id='UnitHeader-importedby'] a")
if importEl.Length() > 0 {
text := importEl.Text()
// Extract number from "Imported by: 144,729"
re := regexp.MustCompile(`[\d,]+`)
if match := re.FindString(text); match != "" {
match = strings.ReplaceAll(match, ",", "")
var count int
for _, c := range match {
if c >= '0' && c <= '9' {
count = count*10 + int(c-'0')
}
}
return count
}
}
return 0
}
// extractRepository extracts the repository URL.
func (p *Parser) extractRepository(doc *goquery.Document) string {
repoEl := doc.Find(".UnitMeta-repo a")
if repoEl.Length() > 0 {
if href, exists := repoEl.Attr("href"); exists {
return href
}
}
return ""
}
// extractFunctions extracts all function declarations.
func (p *Parser) extractFunctions(doc *goquery.Document) []*Function {
var functions []*Function
doc.Find(".Documentation-function").Each(func(_ int, s *goquery.Selection) {
fn := &Function{}
// Extract name from the function header
nameEl := s.Find(".Documentation-functionHeader").First()
if nameEl.Length() > 0 {
fn.Name = strings.TrimSpace(nameEl.Text())
}
// Extract signature from code block
sigEl := s.Find("pre").First()
if sigEl.Length() > 0 {
fn.Signature = strings.TrimSpace(sigEl.Text())
}
// Extract documentation
docEl := s.Find(".Documentation-functionBody p").First()
if docEl.Length() == 0 {
docEl = s.Find("p").First()
}
if docEl.Length() > 0 {
fn.Doc = strings.TrimSpace(docEl.Text())
}
// Extract examples
fn.Examples = p.extractExamples(s)
if fn.Name != "" {
functions = append(functions, fn)
}
})
return functions
}
// extractTypes extracts all type declarations.
func (p *Parser) extractTypes(doc *goquery.Document) []*Type {
var types []*Type
doc.Find(".Documentation-type").Each(func(_ int, s *goquery.Selection) {
t := &Type{}
// Extract name from the type header
nameEl := s.Find(".Documentation-typeHeader").First()
if nameEl.Length() > 0 {
t.Name = strings.TrimSpace(nameEl.Text())
}
// Determine kind from signature
sigEl := s.Find("pre").First()
if sigEl.Length() > 0 {
sig := sigEl.Text()
t.Underlying = strings.TrimSpace(sig)
if strings.Contains(sig, "struct{") {
t.Kind = TypeKindStruct
t.Fields = p.extractStructFields(sigEl)
} else if strings.Contains(sig, "interface{") {
t.Kind = TypeKindInterface
} else {
t.Kind = TypeKindAlias
}
}
// Extract documentation
docEl := s.Find("p").First()
if docEl.Length() > 0 {
t.Doc = strings.TrimSpace(docEl.Text())
}
// Extract methods
t.Methods = p.extractMethods(s)
// Extract examples
t.Examples = p.extractExamples(s)
if t.Name != "" {
types = append(types, t)
}
})
return types
}
// extractStructFields extracts struct fields from a type definition.
func (p *Parser) extractStructFields(sigEl *goquery.Selection) []*Field {
var fields []*Field
sigEl.Find("tr, .Documentation-structField").Each(func(_ int, s *goquery.Selection) {
text := s.Text()
text = strings.TrimSpace(text)
if text == "" || strings.HasPrefix(text, "//") {
return
}
// Parse field: Name Type `tag`
parts := strings.Fields(text)
if len(parts) >= 1 {
field := &Field{
Name: parts[0],
Exported: isExported(parts[0]),
}
if len(parts) >= 2 {
field.Type = strings.Join(parts[1:], " ")
// Remove tag
if idx := strings.Index(field.Type, "`"); idx > 0 {
field.Tag = field.Type[idx:]
field.Type = field.Type[:idx]
}
}
fields = append(fields, field)
}
})
return fields
}
// extractMethods extracts methods from a type section.
func (p *Parser) extractMethods(typeSection *goquery.Selection) []*Method {
var methods []*Method
typeSection.Find(".Documentation-method, .Documentation-function").Each(func(_ int, s *goquery.Selection) {
m := &Method{}
// Extract method name
nameEl := s.Find(".Documentation-functionHeader, .Documentation-methodHeader").First()
if nameEl.Length() > 0 {
name := strings.TrimSpace(nameEl.Text())
// Extract receiver if present: (t *Type) Method(...)
if strings.HasPrefix(name, "(") {
if end := strings.Index(name, ")"); end > 0 {
m.Receiver = name[1:end]
name = strings.TrimSpace(name[end+1:])
}
}
m.Name = name
}
// Extract signature
sigEl := s.Find("pre").First()
if sigEl.Length() > 0 {
m.Signature = strings.TrimSpace(sigEl.Text())
}
// Extract documentation
docEl := s.Find("p").First()
if docEl.Length() > 0 {
m.Doc = strings.TrimSpace(docEl.Text())
}
if m.Name != "" {
methods = append(methods, m)
}
})
return methods
}
// extractConstants extracts constant declarations.
func (p *Parser) extractConstants(doc *goquery.Document) []*Value {
var constants []*Value
doc.Find(".Documentation-constants").Each(func(_ int, s *goquery.Selection) {
// Extract constant group
codeEl := s.Find("pre").First()
if codeEl.Length() > 0 {
v := &Value{
IsConst: true,
}
// Parse const declarations
text := codeEl.Text()
lines := strings.Split(text, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "//") {
continue
}
// Simple const: Name = value
if strings.Contains(line, "=") {
parts := strings.SplitN(line, "=", 2)
if len(parts) == 2 {
name := strings.TrimSpace(parts[0])
if v.Names == nil {
v.Names = []string{}
}
v.Names = append(v.Names, name)
if v.Name == "" {
v.Name = name
}
v.Value = strings.TrimSpace(parts[1])
}
}
}
// Extract documentation
docEl := s.Find("p").First()
if docEl.Length() > 0 {
v.Doc = strings.TrimSpace(docEl.Text())
}
if len(v.Names) > 0 {
constants = append(constants, v)
}
}
})
return constants
}
// extractVariables extracts variable declarations.
func (p *Parser) extractVariables(doc *goquery.Document) []*Value {
var variables []*Value
doc.Find(".Documentation-variables").Each(func(_ int, s *goquery.Selection) {
codeEl := s.Find("pre").First()
if codeEl.Length() > 0 {
v := &Value{
IsConst: false,
}
text := codeEl.Text()
// Parse var declarations
if strings.HasPrefix(text, "var ") {
text = strings.TrimPrefix(text, "var ")
}
lines := strings.Split(text, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// Parse: Name Type = value
if strings.Contains(line, "=") {
parts := strings.SplitN(line, "=", 2)
if len(parts) == 2 {
nameType := strings.TrimSpace(parts[0])
v.Name = strings.Fields(nameType)[0]
v.Value = strings.TrimSpace(parts[1])
break
}
} else {
// Just name and type
fields := strings.Fields(line)
if len(fields) >= 1 {
v.Name = fields[0]
if len(fields) >= 2 {
v.Type = strings.Join(fields[1:], " ")
}
}
}
}
// Extract documentation
docEl := s.Find("p").First()
if docEl.Length() > 0 {
v.Doc = strings.TrimSpace(docEl.Text())
}
if v.Name != "" {
variables = append(variables, v)
}
}
})
return variables
}
// extractExamples extracts examples from a section.
func (p *Parser) extractExamples(section *goquery.Selection) []*Example {
var examples []*Example
section.Find(".Documentation-example").Each(func(_ int, s *goquery.Selection) {
ex := &Example{}
// Extract example name
nameEl := s.Find(".Documentation-exampleHeader").First()
if nameEl.Length() > 0 {
ex.Name = strings.TrimSpace(nameEl.Text())
}
// Extract code
codeEl := s.Find("pre, code").First()
if codeEl.Length() > 0 {
ex.Code = strings.TrimSpace(codeEl.Text())
}
// Extract output
outputEl := s.Find(".Documentation-exampleOutput").First()
if outputEl.Length() > 0 {
ex.Output = strings.TrimSpace(outputEl.Text())
}
// Extract documentation
docEl := s.Find("p").First()
if docEl.Length() > 0 {
ex.Doc = strings.TrimSpace(docEl.Text())
}
if ex.Code != "" {
examples = append(examples, ex)
}
})
return examples
}
// extractPackageExamples extracts package-level examples.
func (p *Parser) extractPackageExamples(doc *goquery.Document) []*Example {
var examples []*Example
doc.Find(".Documentation-example").Each(func(_ int, s *goquery.Selection) {
ex := &Example{}
// Extract example name
nameEl := s.Find(".Documentation-exampleHeader").First()
if nameEl.Length() > 0 {
ex.Name = strings.TrimSpace(nameEl.Text())
}
// Extract code
codeEl := s.Find("pre, code").First()
if codeEl.Length() > 0 {
ex.Code = strings.TrimSpace(codeEl.Text())
}
// Extract output
outputEl := s.Find(".Documentation-exampleOutput").First()
if outputEl.Length() > 0 {
ex.Output = strings.TrimSpace(outputEl.Text())
}
if ex.Code != "" {
examples = append(examples, ex)
}
})
return examples
}
// parseCount parses a count string to int.
func parseCount(s string) int {
var count int
for _, c := range s {
if c >= '0' && c <= '9' {
count = count*10 + int(c-'0')
}
}
return count
}
// isExported checks if a name is exported (starts with uppercase).
func isExported(name string) bool {
if len(name) == 0 {
return false
}
return name[0] >= 'A' && name[0] <= 'Z'
}
// cleanWhitespace normalizes whitespace in text.
func cleanWhitespace(text string) string {
// Replace multiple whitespace with single space
re := regexp.MustCompile(`\s+`)
text = re.ReplaceAllString(text, " ")
return strings.TrimSpace(text)
}
+268
View File
@@ -0,0 +1,268 @@
package godocs
import (
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
)
const testPackageHTML = `
<!DOCTYPE html>
<html>
<head><title>runtime - pkg.go.dev</title></head>
<body>
<nav class="go-Breadcrumb">
<ol>
<li><a href="/">Discover Packages</a></li>
<li><a href="/k8s.io/apimachinery">k8s.io/apimachinery</a></li>
<li><a href="/k8s.io/apimachinery/pkg">pkg</a></li>
<li><a href="/k8s.io/apimachinery/pkg/runtime">runtime</a></li>
</ol>
</nav>
<h1 class="UnitHeader-titleHeading">runtime</h1>
<div class="go-Main-headerDetails">
<span data-test-id="UnitHeader-version"><a href="?tab=versions">v0.35.1</a></span>
<span data-test-id="UnitHeader-importedby"><a href="?tab=importedby">Imported by: 144,729</a></span>
<span data-test-id="UnitHeader-licenses"><a href="?tab=licenses">Apache-2.0</a></span>
</div>
<div class="Documentation">
<p>Package runtime defines conversions between generic types and structs to map query strings to struct objects.</p>
<p>This is additional documentation text for the package.</p>
</div>
<div class="Documentation-function">
<div class="Documentation-functionHeader">func DecodeInto</div>
<pre>func DecodeInto(d Decoder, data []byte, into Object) error</pre>
<p>DecodeInto is a helper function that decodes the given data into the provided object.</p>
</div>
<div class="Documentation-type">
<div class="Documentation-typeHeader">type Codec</div>
<pre>type Codec struct {
Encoder Encoder
Decoder Decoder
}</pre>
<p>Codec is a struct that holds an encoder and decoder.</p>
<div class="Documentation-method">
<div class="Documentation-methodHeader">func (*Codec) Encode</div>
<pre>func (c *Codec) Encode(obj Object) ([]byte, error)</pre>
<p>Encode encodes the given object.</p>
</div>
</div>
<div class="Documentation-constants">
<pre>const (
ContentTypeJSON = "application/json"
ContentTypeYAML = "application/yaml"
)</pre>
<p>Content types for different formats.</p>
</div>
<div class="Documentation-variables">
<pre>var DefaultScheme = NewScheme()</pre>
<p>DefaultScheme is the default scheme used for encoding/decoding.</p>
</div>
</body>
</html>
`
const testSearchHTML = `
<!DOCTYPE html>
<html>
<body>
<div class="SearchSnippet">
<h2><a href="/k8s.io/apimachinery/pkg/runtime">runtime <span class="SearchSnippet-header-path">(k8s.io/apimachinery/pkg/runtime)</span></a></h2>
<p class="SearchSnippet-synopsis">Package runtime defines conversions between generic types and structs.</p>
<div class="SearchSnippet-infoLabel">
<a href="?tab=importedby">Imported by: <strong>144,729</strong></a>
<span>v0.35.1 published on <strong>Dec 4, 2025</strong></span>
<a href="?tab=licenses">Apache-2.0</a>
</div>
</div>
<div class="SearchSnippet">
<h2><a href="/github.com/google/go-querystring/query">query <span class="SearchSnippet-header-path">(github.com/google/go-querystring/query)</span></a></h2>
<p class="SearchSnippet-synopsis">Package query implements encoding of structs into URL query parameters.</p>
<div class="SearchSnippet-infoLabel">
<a href="?tab=importedby">Imported by: <strong>5,111</strong></a>
<span>v1.2.0 published on <strong>Nov 10, 2025</strong></span>
<a href="?tab=licenses">BSD-3-Clause</a>
</div>
</div>
</body>
</html>
`
func TestParsePackagePage(t *testing.T) {
parser := NewParser()
pkg, err := parser.ParsePackagePage(testPackageHTML, "https://pkg.go.dev/k8s.io/apimachinery/pkg/runtime")
if err != nil {
t.Fatalf("ParsePackagePage failed: %v", err)
}
if pkg.Name != "runtime" {
t.Errorf("Expected name 'runtime', got '%s'", pkg.Name)
}
if pkg.ImportPath != "k8s.io/apimachinery/pkg/runtime" {
t.Errorf("Expected import path 'k8s.io/apimachinery/pkg/runtime', got '%s'", pkg.ImportPath)
}
if pkg.Version != "v0.35.1" {
t.Errorf("Expected version 'v0.35.1', got '%s'", pkg.Version)
}
if pkg.ImportedBy != 144729 {
t.Errorf("Expected imported by 144729, got %d", pkg.ImportedBy)
}
if pkg.Synopsis == "" {
t.Error("Expected non-empty synopsis")
}
if len(pkg.Functions) == 0 {
t.Error("Expected at least one function")
}
if len(pkg.Types) == 0 {
t.Error("Expected at least one type")
}
if len(pkg.Constants) == 0 {
t.Error("Expected at least one constant")
}
if len(pkg.Variables) == 0 {
t.Error("Expected at least one variable")
}
}
func TestParseSearchResults(t *testing.T) {
parser := NewParser()
results, err := parser.ParseSearchResults(testSearchHTML)
if err != nil {
t.Fatalf("ParseSearchResults failed: %v", err)
}
if len(results) < 2 {
t.Fatalf("Expected at least 2 results, got %d", len(results))
}
first := results[0]
if first.Synopsis == "" {
t.Error("Expected non-empty synopsis")
}
if first.Path == "" {
t.Error("Expected non-empty path")
}
if first.URL == "" {
t.Error("Expected non-empty URL")
}
}
func TestIsExported(t *testing.T) {
tests := []struct {
name string
expected bool
}{
{"Exported", true},
{"unexported", false},
{"", false},
{"CamelCase", true},
{"camelCase", false},
{"X", true},
{"x", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isExported(tt.name); got != tt.expected {
t.Errorf("isExported(%q) = %v, want %v", tt.name, got, tt.expected)
}
})
}
}
func TestCleanWhitespace(t *testing.T) {
tests := []struct {
input string
expected string
}{
{" hello world ", "hello world"},
{"single", "single"},
{"multiple spaces here", "multiple spaces here"},
{"\n\ttabs\t\n", "tabs"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
if got := cleanWhitespace(tt.input); got != tt.expected {
t.Errorf("cleanWhitespace(%q) = %q, want %q", tt.input, got, tt.expected)
}
})
}
}
func TestParseCount(t *testing.T) {
tests := []struct {
input string
expected int
}{
{"144729", 144729},
{"5,111", 5111},
{"0", 0},
{"1,234,567", 1234567},
{"abc", 0},
{"", 0},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
if got := parseCount(tt.input); got != tt.expected {
t.Errorf("parseCount(%q) = %d, want %d", tt.input, got, tt.expected)
}
})
}
}
func TestExtractImportPath(t *testing.T) {
parser := NewParser()
tests := []struct {
html string
url string
expected string
}{
{
html: `<nav class="go-Breadcrumb"><li><a href="/">Discover</a></li><li><a href="/k8s.io/apimachinery">k8s.io/apimachinery</a></li><li><a href="/k8s.io/apimachinery/pkg">pkg</a></li><li><a href="/k8s.io/apimachinery/pkg/runtime">runtime</a></li></nav>`,
url: "https://pkg.go.dev/k8s.io/apimachinery/pkg/runtime",
expected: "k8s.io/apimachinery/pkg/runtime",
},
{
html: `<nav class="go-Breadcrumb"><li><a href="/github.com/user/repo">github.com/user/repo</a></li></nav>`,
url: "https://pkg.go.dev/github.com/user/repo@v1.0.0",
expected: "github.com/user/repo",
},
}
for _, tt := range tests {
t.Run(tt.expected, func(t *testing.T) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(tt.html))
if err != nil {
t.Fatalf("Failed to parse HTML: %v", err)
}
got := parser.extractImportPath(doc, tt.url)
if got != tt.expected {
t.Errorf("extractImportPath() = %q, want %q", got, tt.expected)
}
})
}
}
+156
View File
@@ -0,0 +1,156 @@
// Package godocs provides parsing and extraction for Go package documentation
// from pkg.go.dev and similar documentation sites.
package godocs
import "time"
// Package represents a Go package's documentation.
type Package struct {
// Import path (e.g., "github.com/user/repo/pkg")
ImportPath string `json:"import_path"`
// Package name (last element of import path)
Name string `json:"name"`
// Synopsis is a short one-line description
Synopsis string `json:"synopsis"`
// Full documentation text
Doc string `json:"doc"`
// Version information
Version string `json:"version"`
// Module information
Module *Module `json:"module,omitempty"`
// License information
Licenses []License `json:"licenses,omitempty"`
// Functions exported by the package
Functions []*Function `json:"functions,omitempty"`
// Types defined in the package
Types []*Type `json:"types,omitempty"`
// Constants defined in the package
Constants []*Value `json:"constants,omitempty"`
// Variables defined in the package
Variables []*Value `json:"variables,omitempty"`
// Examples for the package
Examples []*Example `json:"examples,omitempty"`
// Import count
ImportedBy int `json:"imported_by"`
// Repository URL
Repository string `json:"repository,omitempty"`
// Documentation URL
DocURL string `json:"doc_url"`
// When the documentation was fetched
FetchedAt time.Time `json:"fetched_at"`
}
// Module represents Go module information.
type Module struct {
Path string `json:"path"`
Version string `json:"version"`
}
// License represents license information.
type License struct {
Name string `json:"name"`
Path string `json:"path,omitempty"`
}
// Function represents a function declaration.
type Function struct {
Name string `json:"name"`
Doc string `json:"doc,omitempty"`
Signature string `json:"signature"`
Examples []*Example `json:"examples,omitempty"`
}
// Type represents a type declaration.
type Type struct {
Name string `json:"name"`
Doc string `json:"doc,omitempty"`
Kind TypeKind `json:"kind"`
Underlying string `json:"underlying,omitempty"` // For type aliases
Fields []*Field `json:"fields,omitempty"` // For structs
Methods []*Method `json:"methods,omitempty"`
Examples []*Example `json:"examples,omitempty"`
}
// TypeKind represents the kind of type.
type TypeKind string
const (
TypeKindBasic TypeKind = "basic"
TypeKindStruct TypeKind = "struct"
TypeKindInterface TypeKind = "interface"
TypeKindAlias TypeKind = "alias"
TypeKindFunc TypeKind = "func"
)
// Field represents a struct field.
type Field struct {
Name string `json:"name"`
Type string `json:"type"`
Doc string `json:"doc,omitempty"`
Tag string `json:"tag,omitempty"`
Embedded bool `json:"embedded,omitempty"`
Exported bool `json:"exported"`
}
// Method represents a method on a type.
type Method struct {
Name string `json:"name"`
Doc string `json:"doc,omitempty"`
Signature string `json:"signature"`
Receiver string `json:"receiver,omitempty"`
}
// Value represents a constant or variable declaration.
type Value struct {
Name string `json:"name"`
Doc string `json:"doc,omitempty"`
Type string `json:"type,omitempty"`
Value string `json:"value,omitempty"`
Names []string `json:"names,omitempty"` // For const groups
IsConst bool `json:"is_const"`
}
// Example represents a code example.
type Example struct {
Name string `json:"name"`
Doc string `json:"doc,omitempty"`
Code string `json:"code"`
Output string `json:"output,omitempty"`
PlayURL string `json:"play_url,omitempty"`
}
// SearchResult represents a search result from pkg.go.dev.
type SearchResult struct {
Name string `json:"name"`
Path string `json:"path"`
Synopsis string `json:"synopsis"`
ImportedBy int `json:"imported_by"`
Version string `json:"version"`
Published string `json:"published"`
License string `json:"license"`
URL string `json:"url"`
}
// Symbol represents a symbol (function, type, etc.) within a package.
type Symbol struct {
Name string `json:"name"`
Kind string `json:"kind"` // function, type, constant, variable
Signature string `json:"signature,omitempty"`
Doc string `json:"doc,omitempty"`
Recv string `json:"recv,omitempty"` // For methods
}