package springdocs import ( "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/yourorg/devour/pkg/parserutil" ) type Parser struct { baseURL string } func NewParser() *Parser { return &Parser{ baseURL: "https://docs.spring.io", } } func (p *Parser) ParseModulePage(html string, docURL string) (*Module, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return nil, err } module := &Module{ DocURL: docURL, FetchedAt: time.Now(), } module.Name = p.extractModuleName(doc) module.Doc = p.extractModuleDoc(doc) module.Version = p.extractVersion(doc) module.Classes = p.extractClasses(doc, module.Name, docURL) module.Properties = p.extractProperties(doc, docURL) module.Guides = p.extractGuides(doc, docURL) return module, nil } func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return nil, err } var results []*SearchResult doc.Find(".search-result, .ais-Hits-item, article").Each(func(i int, s *goquery.Selection) { result := &SearchResult{} link := s.Find("a").First() result.Name = strings.TrimSpace(link.Text()) if href, exists := link.Attr("href"); exists { result.DocURL = resolveURL(p.baseURL, href) } result.Doc = strings.TrimSpace(s.Find(".summary, .description, p").First().Text()) if strings.Contains(result.DocURL, "/api/") { result.Kind = "class" } else if strings.Contains(result.DocURL, "/guides/") || strings.Contains(result.DocURL, "/tutorial/") { result.Kind = "guide" } else { result.Kind = "doc" } results = append(results, result) }) return results, nil } func (p *Parser) extractModuleName(doc *goquery.Document) string { title := doc.Find("h1, .title, .page-title").First().Text() title = strings.TrimSpace(title) if title != "" { if idx := strings.Index(title, " "); idx > 0 { return title[:idx] } return title } return "" } func (p *Parser) extractModuleDoc(doc *goquery.Document) string { docblock := doc.Find(".paragraph:first-child p, .lead, #content p:first-of-type").First() return strings.TrimSpace(docblock.Text()) } func (p *Parser) extractVersion(doc *goquery.Document) string { versionEl := doc.Find(".version, .doc-version, [data-version]") return strings.TrimSpace(versionEl.Text()) } func (p *Parser) extractClasses(doc *goquery.Document, moduleName string, docURL string) []*Class { var classes []*Class doc.Find("table.table tbody tr, .api-list a, .class-link").Each(func(_ int, s *goquery.Selection) { class := &Class{} link := s.Find("a") if link.Length() == 0 { link = s } class.Name = strings.TrimSpace(link.Text()) if href, exists := link.Attr("href"); exists { class.DocURL = resolveURL(docURL, href) if strings.Contains(href, "/api/") { class.QualifiedName = extractQualifiedName(href) } } class.Doc = strings.TrimSpace(s.Find(".description, td:last-child").Text()) if class.Name != "" { classes = append(classes, class) } }) return classes } func (p *Parser) extractProperties(doc *goquery.Document, docURL string) []*Property { var properties []*Property doc.Find(".configuration-property, table.properties tbody tr, .config-props dt").Each(func(_ int, s *goquery.Selection) { prop := &Property{} nameEl := s.Find(".property-name, code, strong, td:first-child").First() prop.Name = strings.TrimSpace(nameEl.Text()) prop.Type = strings.TrimSpace(s.Find(".property-type, .type").Text()) prop.Default = strings.TrimSpace(s.Find(".default-value, .default").Text()) prop.Doc = strings.TrimSpace(s.Find(".description, dd, td:last-child").Text()) if prop.Name != "" { properties = append(properties, prop) } }) return properties } func (p *Parser) extractGuides(doc *goquery.Document, docURL string) []*Guide { var guides []*Guide doc.Find(".guide-link, .tutorial-link, .guide-card").Each(func(_ int, s *goquery.Selection) { guide := &Guide{} link := s.Find("a") if link.Length() == 0 { link = s } guide.Title = strings.TrimSpace(link.Text()) if href, exists := link.Attr("href"); exists { guide.DocURL = resolveURL(docURL, href) } guide.Description = strings.TrimSpace(s.Find(".summary, .description").Text()) if guide.Title != "" { guides = append(guides, guide) } }) return guides } func (p *Parser) ParseClassPage(html string, docURL string) (*Class, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return nil, err } class := &Class{ DocURL: docURL, } header := doc.Find("h1, .title, .class-name").First() class.Name = strings.TrimSpace(header.Text()) class.QualifiedName = class.Name class.Doc = strings.TrimSpace(doc.Find(".class-description, .javadoc, .class-comment").First().Text()) class.Methods = p.extractClassMethods(doc, class.Name, docURL) class.Fields = p.extractClassFields(doc, class.Name, docURL) class.Constructors = p.extractClassConstructors(doc, class.Name, docURL) return class, nil } func (p *Parser) extractClassMethods(doc *goquery.Document, className string, docURL string) []*Method { var methods []*Method doc.Find("table.method-summary tbody tr, .method, .member").Each(func(_ int, s *goquery.Selection) { method := &Method{ IsConstructor: false, } link := s.Find("a").First() method.Name = strings.TrimSpace(link.Text()) if method.Name == "" { sig := s.Find(".method-signature, code").Text() method.Name = extractSpringMethodName(sig) } method.Signature = strings.TrimSpace(s.Find(".method-signature, code").Text()) method.Doc = strings.TrimSpace(s.Find(".method-description, td:last-child, dd").Text()) if href, exists := link.Attr("href"); exists { if strings.HasPrefix(href, "#") { method.DocURL = docURL + href } else { method.DocURL = resolveURL(docURL, href) } method.QualifiedName = className + "." + method.Name } if method.Name != "" { methods = append(methods, method) } }) return methods } func (p *Parser) extractClassFields(doc *goquery.Document, className string, docURL string) []*Field { var fields []*Field doc.Find("table.field-summary tbody tr, .field").Each(func(_ int, s *goquery.Selection) { field := &Field{} field.Name = strings.TrimSpace(s.Find(".field-name, a, td:first-child").Text()) field.Type = strings.TrimSpace(s.Find(".field-type, td:nth-child(2)").Text()) field.Doc = strings.TrimSpace(s.Find(".field-description, td:last-child").Text()) if field.Name != "" { fields = append(fields, field) } }) return fields } func (p *Parser) extractClassConstructors(doc *goquery.Document, className string, docURL string) []*Method { var constructors []*Method doc.Find("table.constructor-summary tbody tr, .constructor").Each(func(_ int, s *goquery.Selection) { ctor := &Method{ IsConstructor: true, Name: className, } ctor.Signature = strings.TrimSpace(s.Find(".constructor-signature, code").Text()) ctor.Doc = strings.TrimSpace(s.Find(".constructor-description, td:last-child").Text()) constructors = append(constructors, ctor) }) return constructors } func extractSpringMethodName(sig string) string { sig = strings.TrimSpace(sig) if idx := strings.Index(sig, "("); idx > 0 { prefix := sig[:idx] parts := strings.Fields(prefix) if len(parts) > 0 { return parts[len(parts)-1] } } return "" } func extractQualifiedName(href string) string { href = strings.TrimSuffix(href, "/") parts := strings.Split(href, "/") if len(parts) >= 2 { return parts[len(parts)-1] } return "" } func resolveURL(base string, href string) string { return parserutil.ResolveURL(base, href) }