mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
356 lines
8.9 KiB
Go
356 lines
8.9 KiB
Go
package javadocs
|
|
|
|
import (
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/yourorg/devour/pkg/parserutil"
|
|
)
|
|
|
|
type Parser struct {
|
|
baseURL string
|
|
}
|
|
|
|
func NewParser() *Parser {
|
|
return &Parser{
|
|
baseURL: "https://docs.oracle.com",
|
|
}
|
|
}
|
|
|
|
func (p *Parser) ParsePackagePage(html string, docURL string) (*Package, error) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
pkg := &Package{
|
|
DocURL: docURL,
|
|
FetchedAt: time.Now(),
|
|
}
|
|
|
|
pkg.Name = p.extractPackageName(doc)
|
|
pkg.Doc = p.extractPackageDoc(doc)
|
|
pkg.Classes = p.extractClasses(doc, pkg.Name, docURL)
|
|
pkg.Interfaces = p.extractInterfaces(doc, pkg.Name, docURL)
|
|
pkg.Enums = p.extractEnums(doc, pkg.Name, docURL)
|
|
pkg.Exceptions = p.extractExceptions(doc, pkg.Name, docURL)
|
|
|
|
return pkg, nil
|
|
}
|
|
|
|
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var results []*SearchResult
|
|
|
|
doc.Find(".result").Each(func(i int, s *goquery.Selection) {
|
|
result := &SearchResult{}
|
|
|
|
link := s.Find("a").First()
|
|
result.Name = strings.TrimSpace(link.Text())
|
|
|
|
if href, exists := link.Attr("href"); exists {
|
|
result.DocURL = resolveURL(p.baseURL, href)
|
|
}
|
|
|
|
result.Kind = s.Find(".result-kind").Text()
|
|
result.QualName = s.Find(".qualified-name").Text()
|
|
result.Package = s.Find(".package").Text()
|
|
result.Doc = strings.TrimSpace(s.Find(".description").Text())
|
|
|
|
results = append(results, result)
|
|
})
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func (p *Parser) extractPackageName(doc *goquery.Document) string {
|
|
title := doc.Find("h1, .title").First().Text()
|
|
title = strings.TrimSpace(title)
|
|
|
|
if strings.Contains(title, "Package") {
|
|
parts := strings.Fields(title)
|
|
for i, part := range parts {
|
|
if part == "Package" && i+1 < len(parts) {
|
|
return parts[i+1]
|
|
}
|
|
}
|
|
}
|
|
|
|
if title != "" {
|
|
return title
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func (p *Parser) extractPackageDoc(doc *goquery.Document) string {
|
|
docblock := doc.Find(".block, .description, #package-description").First()
|
|
return strings.TrimSpace(docblock.Text())
|
|
}
|
|
|
|
func (p *Parser) extractClasses(doc *goquery.Document, pkgName string, docURL string) []*Class {
|
|
var classes []*Class
|
|
|
|
doc.Find("table.type-summary tr, .class-summary .member, section.class tbody tr").Each(func(_ int, s *goquery.Selection) {
|
|
class := &Class{
|
|
Package: pkgName,
|
|
Kind: ClassKindClass,
|
|
}
|
|
|
|
link := s.Find("a").First()
|
|
class.Name = strings.TrimSpace(link.Text())
|
|
|
|
if class.Name == "" {
|
|
class.Name = strings.TrimSpace(s.Find(".member-name, td:first-child").Text())
|
|
}
|
|
|
|
if href, exists := link.Attr("href"); exists {
|
|
class.DocURL = resolveURL(docURL, href)
|
|
class.QualifiedName = pkgName + "." + class.Name
|
|
}
|
|
|
|
class.Doc = strings.TrimSpace(s.Find(".member-summary, td:last-child").Text())
|
|
|
|
if class.Name != "" && !strings.Contains(class.Name, "interface") {
|
|
classes = append(classes, class)
|
|
}
|
|
})
|
|
|
|
return classes
|
|
}
|
|
|
|
func (p *Parser) extractInterfaces(doc *goquery.Document, pkgName string, docURL string) []*Class {
|
|
var interfaces []*Class
|
|
|
|
doc.Find("table.interface-summary tr, .interface-summary .member").Each(func(_ int, s *goquery.Selection) {
|
|
iface := &Class{
|
|
Package: pkgName,
|
|
Kind: ClassKindInterface,
|
|
}
|
|
|
|
link := s.Find("a").First()
|
|
iface.Name = strings.TrimSpace(link.Text())
|
|
|
|
if iface.Name == "" {
|
|
iface.Name = strings.TrimSpace(s.Find(".member-name").Text())
|
|
}
|
|
|
|
if href, exists := link.Attr("href"); exists {
|
|
iface.DocURL = resolveURL(docURL, href)
|
|
iface.QualifiedName = pkgName + "." + iface.Name
|
|
}
|
|
|
|
iface.Doc = strings.TrimSpace(s.Find(".member-summary, td:last-child").Text())
|
|
|
|
if iface.Name != "" {
|
|
interfaces = append(interfaces, iface)
|
|
}
|
|
})
|
|
|
|
return interfaces
|
|
}
|
|
|
|
func (p *Parser) extractEnums(doc *goquery.Document, pkgName string, docURL string) []*Enum {
|
|
var enums []*Enum
|
|
|
|
doc.Find("table.enum-summary tr, .enum-summary .member").Each(func(_ int, s *goquery.Selection) {
|
|
enum := &Enum{
|
|
Package: pkgName,
|
|
}
|
|
|
|
link := s.Find("a").First()
|
|
enum.Name = strings.TrimSpace(link.Text())
|
|
|
|
if enum.Name == "" {
|
|
enum.Name = strings.TrimSpace(s.Find(".member-name").Text())
|
|
}
|
|
|
|
if href, exists := link.Attr("href"); exists {
|
|
enum.DocURL = resolveURL(docURL, href)
|
|
enum.QualifiedName = pkgName + "." + enum.Name
|
|
}
|
|
|
|
enum.Doc = strings.TrimSpace(s.Find(".member-summary, td:last-child").Text())
|
|
|
|
if enum.Name != "" {
|
|
enums = append(enums, enum)
|
|
}
|
|
})
|
|
|
|
return enums
|
|
}
|
|
|
|
func (p *Parser) extractExceptions(doc *goquery.Document, pkgName string, docURL string) []*Class {
|
|
var exceptions []*Class
|
|
|
|
doc.Find("table.exception-summary tr, .exception-summary .member").Each(func(_ int, s *goquery.Selection) {
|
|
exc := &Class{
|
|
Package: pkgName,
|
|
Kind: ClassKindClass,
|
|
}
|
|
|
|
link := s.Find("a").First()
|
|
exc.Name = strings.TrimSpace(link.Text())
|
|
|
|
if exc.Name == "" {
|
|
exc.Name = strings.TrimSpace(s.Find(".member-name").Text())
|
|
}
|
|
|
|
if href, exists := link.Attr("href"); exists {
|
|
exc.DocURL = resolveURL(docURL, href)
|
|
exc.QualifiedName = pkgName + "." + exc.Name
|
|
}
|
|
|
|
exc.Doc = strings.TrimSpace(s.Find(".member-summary, td:last-child").Text())
|
|
|
|
if exc.Name != "" {
|
|
exceptions = append(exceptions, exc)
|
|
}
|
|
})
|
|
|
|
return exceptions
|
|
}
|
|
|
|
func (p *Parser) ParseClassPage(html string, docURL string) (*Class, error) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
class := &Class{
|
|
DocURL: docURL,
|
|
}
|
|
|
|
header := doc.Find(".header, h1, .class-name").First()
|
|
class.Name = strings.TrimSpace(header.Text())
|
|
|
|
class.QualifiedName = class.Name
|
|
if idx := strings.LastIndex(class.Name, "."); idx > 0 {
|
|
class.Package = class.Name[:idx]
|
|
class.Name = class.Name[idx+1:]
|
|
}
|
|
|
|
class.Doc = strings.TrimSpace(doc.Find(".block, .description, .class-description").First().Text())
|
|
|
|
class.Methods = p.extractMethods(doc, class.Name, docURL)
|
|
class.Fields = p.extractFields(doc, class.Name, docURL)
|
|
class.Constructors = p.extractConstructors(doc, class.Name, docURL)
|
|
|
|
return class, nil
|
|
}
|
|
|
|
func (p *Parser) extractMethods(doc *goquery.Document, className string, docURL string) []*Method {
|
|
var methods []*Method
|
|
|
|
doc.Find("table.method-summary tr, .method-summary .member, section.method-detail > ul > li").Each(func(_ int, s *goquery.Selection) {
|
|
method := &Method{
|
|
IsConstructor: false,
|
|
}
|
|
|
|
link := s.Find("a").First()
|
|
method.Name = strings.TrimSpace(link.Text())
|
|
|
|
if method.Name == "" {
|
|
sig := s.Find(".member-signature, code").Text()
|
|
method.Name = extractMethodName(sig)
|
|
}
|
|
|
|
sigEl := s.Find(".member-signature, code, .sig")
|
|
method.Signature = strings.TrimSpace(sigEl.Text())
|
|
|
|
if id, exists := s.Attr("id"); exists {
|
|
method.DocURL = docURL + "#" + id
|
|
method.QualifiedName = className + "." + method.Name
|
|
} else if href, exists := link.Attr("href"); exists {
|
|
method.DocURL = resolveURL(docURL, href)
|
|
method.QualifiedName = className + "." + method.Name
|
|
}
|
|
|
|
method.Doc = strings.TrimSpace(s.Find(".block, .member-summary, dd").First().Text())
|
|
|
|
if method.Name != "" {
|
|
methods = append(methods, method)
|
|
}
|
|
})
|
|
|
|
return methods
|
|
}
|
|
|
|
func (p *Parser) extractFields(doc *goquery.Document, className string, docURL string) []*Field {
|
|
var fields []*Field
|
|
|
|
doc.Find("table.field-summary tr, .field-summary .member").Each(func(_ int, s *goquery.Selection) {
|
|
field := &Field{}
|
|
|
|
link := s.Find("a").First()
|
|
field.Name = strings.TrimSpace(link.Text())
|
|
|
|
if field.Name == "" {
|
|
field.Name = strings.TrimSpace(s.Find(".member-name, td:first-child").Text())
|
|
}
|
|
|
|
field.Type = strings.TrimSpace(s.Find(".member-type, td:nth-child(2)").Text())
|
|
field.Doc = strings.TrimSpace(s.Find(".member-summary, td:last-child").Text())
|
|
|
|
if id, exists := s.Attr("id"); exists {
|
|
field.DocURL = docURL + "#" + id
|
|
}
|
|
|
|
if field.Name != "" {
|
|
fields = append(fields, field)
|
|
}
|
|
})
|
|
|
|
return fields
|
|
}
|
|
|
|
func (p *Parser) extractConstructors(doc *goquery.Document, className string, docURL string) []*Method {
|
|
var constructors []*Method
|
|
|
|
doc.Find("table.constructor-summary tr, .constructor-summary .member").Each(func(_ int, s *goquery.Selection) {
|
|
ctor := &Method{
|
|
IsConstructor: true,
|
|
Name: className,
|
|
}
|
|
|
|
link := s.Find("a").First()
|
|
if name := strings.TrimSpace(link.Text()); name != "" {
|
|
ctor.Name = name
|
|
}
|
|
|
|
sigEl := s.Find(".member-signature, code")
|
|
ctor.Signature = strings.TrimSpace(sigEl.Text())
|
|
|
|
ctor.Doc = strings.TrimSpace(s.Find(".block, .member-summary, td:last-child").Text())
|
|
|
|
if id, exists := s.Attr("id"); exists {
|
|
ctor.DocURL = docURL + "#" + id
|
|
}
|
|
|
|
constructors = append(constructors, ctor)
|
|
})
|
|
|
|
return constructors
|
|
}
|
|
|
|
func extractMethodName(sig string) string {
|
|
sig = strings.TrimSpace(sig)
|
|
if idx := strings.Index(sig, "("); idx > 0 {
|
|
prefix := sig[:idx]
|
|
parts := strings.Fields(prefix)
|
|
if len(parts) > 0 {
|
|
return parts[len(parts)-1]
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func resolveURL(base string, href string) string {
|
|
return parserutil.ResolveURL(base, href)
|
|
}
|