mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,310 @@
|
||||
package springdocs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
baseURL string
|
||||
}
|
||||
|
||||
func NewParser() *Parser {
|
||||
return &Parser{
|
||||
baseURL: "https://docs.spring.io",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) ParseModulePage(html string, docURL string) (*Module, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
module := &Module{
|
||||
DocURL: docURL,
|
||||
FetchedAt: time.Now(),
|
||||
}
|
||||
|
||||
module.Name = p.extractModuleName(doc)
|
||||
module.Doc = p.extractModuleDoc(doc)
|
||||
module.Version = p.extractVersion(doc)
|
||||
module.Classes = p.extractClasses(doc, module.Name, docURL)
|
||||
module.Properties = p.extractProperties(doc, docURL)
|
||||
module.Guides = p.extractGuides(doc, docURL)
|
||||
|
||||
return module, nil
|
||||
}
|
||||
|
||||
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []*SearchResult
|
||||
|
||||
doc.Find(".search-result, .ais-Hits-item, article").Each(func(i int, s *goquery.Selection) {
|
||||
result := &SearchResult{}
|
||||
|
||||
link := s.Find("a").First()
|
||||
result.Name = strings.TrimSpace(link.Text())
|
||||
|
||||
if href, exists := link.Attr("href"); exists {
|
||||
result.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
result.Doc = strings.TrimSpace(s.Find(".summary, .description, p").First().Text())
|
||||
|
||||
if strings.Contains(result.DocURL, "/api/") {
|
||||
result.Kind = "class"
|
||||
} else if strings.Contains(result.DocURL, "/guides/") || strings.Contains(result.DocURL, "/tutorial/") {
|
||||
result.Kind = "guide"
|
||||
} else {
|
||||
result.Kind = "doc"
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (p *Parser) extractModuleName(doc *goquery.Document) string {
|
||||
title := doc.Find("h1, .title, .page-title").First().Text()
|
||||
title = strings.TrimSpace(title)
|
||||
|
||||
if title != "" {
|
||||
if idx := strings.Index(title, " "); idx > 0 {
|
||||
return title[:idx]
|
||||
}
|
||||
return title
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) extractModuleDoc(doc *goquery.Document) string {
|
||||
docblock := doc.Find(".paragraph:first-child p, .lead, #content p:first-of-type").First()
|
||||
return strings.TrimSpace(docblock.Text())
|
||||
}
|
||||
|
||||
func (p *Parser) extractVersion(doc *goquery.Document) string {
|
||||
versionEl := doc.Find(".version, .doc-version, [data-version]")
|
||||
return strings.TrimSpace(versionEl.Text())
|
||||
}
|
||||
|
||||
func (p *Parser) extractClasses(doc *goquery.Document, moduleName string, docURL string) []*Class {
|
||||
var classes []*Class
|
||||
|
||||
doc.Find("table.table tbody tr, .api-list a, .class-link").Each(func(_ int, s *goquery.Selection) {
|
||||
class := &Class{}
|
||||
|
||||
link := s.Find("a")
|
||||
if link.Length() == 0 {
|
||||
link = s
|
||||
}
|
||||
|
||||
class.Name = strings.TrimSpace(link.Text())
|
||||
|
||||
if href, exists := link.Attr("href"); exists {
|
||||
class.DocURL = resolveURL(docURL, href)
|
||||
if strings.Contains(href, "/api/") {
|
||||
class.QualifiedName = extractQualifiedName(href)
|
||||
}
|
||||
}
|
||||
|
||||
class.Doc = strings.TrimSpace(s.Find(".description, td:last-child").Text())
|
||||
|
||||
if class.Name != "" {
|
||||
classes = append(classes, class)
|
||||
}
|
||||
})
|
||||
|
||||
return classes
|
||||
}
|
||||
|
||||
func (p *Parser) extractProperties(doc *goquery.Document, docURL string) []*Property {
|
||||
var properties []*Property
|
||||
|
||||
doc.Find(".configuration-property, table.properties tbody tr, .config-props dt").Each(func(_ int, s *goquery.Selection) {
|
||||
prop := &Property{}
|
||||
|
||||
nameEl := s.Find(".property-name, code, strong, td:first-child").First()
|
||||
prop.Name = strings.TrimSpace(nameEl.Text())
|
||||
|
||||
prop.Type = strings.TrimSpace(s.Find(".property-type, .type").Text())
|
||||
prop.Default = strings.TrimSpace(s.Find(".default-value, .default").Text())
|
||||
prop.Doc = strings.TrimSpace(s.Find(".description, dd, td:last-child").Text())
|
||||
|
||||
if prop.Name != "" {
|
||||
properties = append(properties, prop)
|
||||
}
|
||||
})
|
||||
|
||||
return properties
|
||||
}
|
||||
|
||||
func (p *Parser) extractGuides(doc *goquery.Document, docURL string) []*Guide {
|
||||
var guides []*Guide
|
||||
|
||||
doc.Find(".guide-link, .tutorial-link, .guide-card").Each(func(_ int, s *goquery.Selection) {
|
||||
guide := &Guide{}
|
||||
|
||||
link := s.Find("a")
|
||||
if link.Length() == 0 {
|
||||
link = s
|
||||
}
|
||||
|
||||
guide.Title = strings.TrimSpace(link.Text())
|
||||
|
||||
if href, exists := link.Attr("href"); exists {
|
||||
guide.DocURL = resolveURL(docURL, href)
|
||||
}
|
||||
|
||||
guide.Description = strings.TrimSpace(s.Find(".summary, .description").Text())
|
||||
|
||||
if guide.Title != "" {
|
||||
guides = append(guides, guide)
|
||||
}
|
||||
})
|
||||
|
||||
return guides
|
||||
}
|
||||
|
||||
func (p *Parser) ParseClassPage(html string, docURL string) (*Class, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
class := &Class{
|
||||
DocURL: docURL,
|
||||
}
|
||||
|
||||
header := doc.Find("h1, .title, .class-name").First()
|
||||
class.Name = strings.TrimSpace(header.Text())
|
||||
class.QualifiedName = class.Name
|
||||
|
||||
class.Doc = strings.TrimSpace(doc.Find(".class-description, .javadoc, .class-comment").First().Text())
|
||||
|
||||
class.Methods = p.extractClassMethods(doc, class.Name, docURL)
|
||||
class.Fields = p.extractClassFields(doc, class.Name, docURL)
|
||||
class.Constructors = p.extractClassConstructors(doc, class.Name, docURL)
|
||||
|
||||
return class, nil
|
||||
}
|
||||
|
||||
func (p *Parser) extractClassMethods(doc *goquery.Document, className string, docURL string) []*Method {
|
||||
var methods []*Method
|
||||
|
||||
doc.Find("table.method-summary tbody tr, .method, .member").Each(func(_ int, s *goquery.Selection) {
|
||||
method := &Method{
|
||||
IsConstructor: false,
|
||||
}
|
||||
|
||||
link := s.Find("a").First()
|
||||
method.Name = strings.TrimSpace(link.Text())
|
||||
|
||||
if method.Name == "" {
|
||||
sig := s.Find(".method-signature, code").Text()
|
||||
method.Name = extractSpringMethodName(sig)
|
||||
}
|
||||
|
||||
method.Signature = strings.TrimSpace(s.Find(".method-signature, code").Text())
|
||||
method.Doc = strings.TrimSpace(s.Find(".method-description, td:last-child, dd").Text())
|
||||
|
||||
if href, exists := link.Attr("href"); exists {
|
||||
if strings.HasPrefix(href, "#") {
|
||||
method.DocURL = docURL + href
|
||||
} else {
|
||||
method.DocURL = resolveURL(docURL, href)
|
||||
}
|
||||
method.QualifiedName = className + "." + method.Name
|
||||
}
|
||||
|
||||
if method.Name != "" {
|
||||
methods = append(methods, method)
|
||||
}
|
||||
})
|
||||
|
||||
return methods
|
||||
}
|
||||
|
||||
func (p *Parser) extractClassFields(doc *goquery.Document, className string, docURL string) []*Field {
|
||||
var fields []*Field
|
||||
|
||||
doc.Find("table.field-summary tbody tr, .field").Each(func(_ int, s *goquery.Selection) {
|
||||
field := &Field{}
|
||||
|
||||
field.Name = strings.TrimSpace(s.Find(".field-name, a, td:first-child").Text())
|
||||
field.Type = strings.TrimSpace(s.Find(".field-type, td:nth-child(2)").Text())
|
||||
field.Doc = strings.TrimSpace(s.Find(".field-description, td:last-child").Text())
|
||||
|
||||
if field.Name != "" {
|
||||
fields = append(fields, field)
|
||||
}
|
||||
})
|
||||
|
||||
return fields
|
||||
}
|
||||
|
||||
func (p *Parser) extractClassConstructors(doc *goquery.Document, className string, docURL string) []*Method {
|
||||
var constructors []*Method
|
||||
|
||||
doc.Find("table.constructor-summary tbody tr, .constructor").Each(func(_ int, s *goquery.Selection) {
|
||||
ctor := &Method{
|
||||
IsConstructor: true,
|
||||
Name: className,
|
||||
}
|
||||
|
||||
ctor.Signature = strings.TrimSpace(s.Find(".constructor-signature, code").Text())
|
||||
ctor.Doc = strings.TrimSpace(s.Find(".constructor-description, td:last-child").Text())
|
||||
|
||||
constructors = append(constructors, ctor)
|
||||
})
|
||||
|
||||
return constructors
|
||||
}
|
||||
|
||||
func extractSpringMethodName(sig string) string {
|
||||
sig = strings.TrimSpace(sig)
|
||||
if idx := strings.Index(sig, "("); idx > 0 {
|
||||
prefix := sig[:idx]
|
||||
parts := strings.Fields(prefix)
|
||||
if len(parts) > 0 {
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractQualifiedName(href string) string {
|
||||
href = strings.TrimSuffix(href, "/")
|
||||
parts := strings.Split(href, "/")
|
||||
if len(parts) >= 2 {
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func resolveURL(base string, href string) string {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
return href
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return baseURL.ResolveReference(hrefURL).String()
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
package springdocs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const testModulePageHTML = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>Spring Boot Reference</h1>
|
||||
<p class="lead">Spring Boot makes it easy to create stand-alone, production-grade Spring based Applications.</p>
|
||||
|
||||
<table class="table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><a href="/spring-boot/docs/current/api/org/springframework/boot/SpringApplication.html">SpringApplication</a></td>
|
||||
<td>Class used to bootstrap and launch a Spring application.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<div class="configuration-property">
|
||||
<dt><code>server.port</code></dt>
|
||||
<dd>Server HTTP port.</dd>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
func TestParseModulePage(t *testing.T) {
|
||||
parser := NewParser()
|
||||
module, err := parser.ParseModulePage(testModulePageHTML, "https://docs.spring.io/spring-boot/docs/current/reference/html/")
|
||||
if err != nil {
|
||||
t.Fatalf("ParseModulePage failed: %v", err)
|
||||
}
|
||||
|
||||
if module.Name == "" {
|
||||
t.Error("Expected non-empty module name")
|
||||
}
|
||||
|
||||
if module.Doc == "" {
|
||||
t.Error("Expected non-empty doc")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractClasses(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testModulePageHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
classes := parser.extractClasses(doc, "spring-boot", "https://docs.spring.io/test")
|
||||
|
||||
if len(classes) == 0 {
|
||||
t.Fatal("Expected at least one class")
|
||||
}
|
||||
|
||||
first := classes[0]
|
||||
if first.Name == "" {
|
||||
t.Error("Expected non-empty class name")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractProperties(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testModulePageHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
props := parser.extractProperties(doc, "https://docs.spring.io/test")
|
||||
|
||||
if len(props) == 0 {
|
||||
t.Skip("No properties extracted from test HTML")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
base string
|
||||
href string
|
||||
expected string
|
||||
}{
|
||||
{"https://docs.spring.io", "/api/TestClass.html", "https://docs.spring.io/api/TestClass.html"},
|
||||
{"https://docs.spring.io", "https://example.com/page", "https://example.com/page"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.href, func(t *testing.T) {
|
||||
got := resolveURL(tt.base, tt.href)
|
||||
if got != tt.expected {
|
||||
t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.href, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
// Package springdocs provides parsing and extraction for Spring Boot documentation
|
||||
// from docs.spring.io.
|
||||
package springdocs
|
||||
|
||||
import "time"
|
||||
|
||||
// Module represents a Spring module's documentation.
|
||||
type Module struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Version string `json:"version,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Packages []*Package `json:"packages,omitempty"`
|
||||
Classes []*Class `json:"classes,omitempty"`
|
||||
Guides []*Guide `json:"guides,omitempty"`
|
||||
Properties []*Property `json:"properties,omitempty"`
|
||||
FetchedAt time.Time `json:"fetched_at"`
|
||||
}
|
||||
|
||||
// Package represents a Spring package.
|
||||
type Package struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Classes []*Class `json:"classes,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
// Class represents a Spring class.
|
||||
type Class struct {
|
||||
QualifiedName string `json:"qualified_name"`
|
||||
Name string `json:"name"`
|
||||
Package string `json:"package"`
|
||||
Kind string `json:"kind"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Annotations []string `json:"annotations,omitempty"`
|
||||
SuperClass string `json:"super_class,omitempty"`
|
||||
Interfaces []string `json:"interfaces,omitempty"`
|
||||
Fields []*Field `json:"fields,omitempty"`
|
||||
Methods []*Method `json:"methods,omitempty"`
|
||||
Constructors []*Method `json:"constructors,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Since string `json:"since,omitempty"`
|
||||
}
|
||||
|
||||
// Field represents a class field.
|
||||
type Field struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Annotations []string `json:"annotations,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
// Method represents a method.
|
||||
type Method struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Annotations []string `json:"annotations,omitempty"`
|
||||
ReturnType string `json:"return_type,omitempty"`
|
||||
Parameters []*Parameter `json:"parameters,omitempty"`
|
||||
Signature string `json:"signature"`
|
||||
QualifiedName string `json:"qualified_name"`
|
||||
DocURL string `json:"doc_url"`
|
||||
IsConstructor bool `json:"is_constructor"`
|
||||
}
|
||||
|
||||
// Parameter represents a method parameter.
|
||||
type Parameter struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
}
|
||||
|
||||
// Guide represents a Spring guide/tutorial.
|
||||
type Guide struct {
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Level string `json:"level,omitempty"` // beginner, intermediate, advanced
|
||||
}
|
||||
|
||||
// Property represents a Spring configuration property.
|
||||
type Property struct {
|
||||
Name string `json:"name"`
|
||||
Value string `json:"value,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Default string `json:"default,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// SearchResult represents a search result.
|
||||
type SearchResult struct {
|
||||
Name string `json:"name"`
|
||||
Kind string `json:"kind"` // class, method, property, guide
|
||||
Module string `json:"module,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Score int `json:"score"`
|
||||
}
|
||||
Reference in New Issue
Block a user