This commit is contained in:
Tomas Dvorak
2026-02-24 10:33:59 +01:00
parent 409acd2e08
commit 898a3c303f
1374 changed files with 290409 additions and 29187 deletions
+16 -22
View File
@@ -1,11 +1,11 @@
package cloudflaredocs
import (
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/yourorg/devour/pkg/parserutil"
)
type Parser struct {
@@ -110,16 +110,24 @@ func (p *Parser) extractContent(doc *goquery.Document) string {
func (p *Parser) extractSections(doc *goquery.Document, docURL string) []*Section {
var sections []*Section
doc.Find("h1, h2, h3").Each(func(_ int, s *goquery.Selection) {
doc.Find("h1, h2, h3, h4").Each(func(_ int, s *goquery.Selection) {
section := &Section{}
section.Title = strings.TrimSpace(s.Text())
section.ID = strings.TrimSpace(s.AttrOr("id", ""))
if id, exists := s.Attr("id"); exists {
section.ID = id
section.DocURL = docURL + "#" + id
} else {
section.DocURL = docURL
if section.ID == "" {
section.ID = strings.TrimSpace(s.AttrOr("data-anchor", ""))
}
if section.ID == "" {
if href, exists := s.Find("a[href^='#']").First().Attr("href"); exists {
section.ID = strings.TrimPrefix(strings.TrimSpace(href), "#")
}
}
section.DocURL = docURL
if section.ID != "" {
section.DocURL = docURL + "#" + section.ID
}
if section.Title != "" {
@@ -189,19 +197,5 @@ func (p *Parser) extractAPIs(doc *goquery.Document, docURL string) []*API {
}
func resolveURL(base string, href string) string {
if strings.HasPrefix(href, "http") {
return href
}
baseURL, err := url.Parse(base)
if err != nil {
return href
}
hrefURL, err := url.Parse(href)
if err != nil {
return href
}
return baseURL.ResolveReference(hrefURL).String()
return parserutil.ResolveURL(base, href)
}