mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
update
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
package cloudflaredocs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/yourorg/devour/pkg/parserutil"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
@@ -110,16 +110,24 @@ func (p *Parser) extractContent(doc *goquery.Document) string {
|
||||
func (p *Parser) extractSections(doc *goquery.Document, docURL string) []*Section {
|
||||
var sections []*Section
|
||||
|
||||
doc.Find("h1, h2, h3").Each(func(_ int, s *goquery.Selection) {
|
||||
doc.Find("h1, h2, h3, h4").Each(func(_ int, s *goquery.Selection) {
|
||||
section := &Section{}
|
||||
|
||||
section.Title = strings.TrimSpace(s.Text())
|
||||
section.ID = strings.TrimSpace(s.AttrOr("id", ""))
|
||||
|
||||
if id, exists := s.Attr("id"); exists {
|
||||
section.ID = id
|
||||
section.DocURL = docURL + "#" + id
|
||||
} else {
|
||||
section.DocURL = docURL
|
||||
if section.ID == "" {
|
||||
section.ID = strings.TrimSpace(s.AttrOr("data-anchor", ""))
|
||||
}
|
||||
if section.ID == "" {
|
||||
if href, exists := s.Find("a[href^='#']").First().Attr("href"); exists {
|
||||
section.ID = strings.TrimPrefix(strings.TrimSpace(href), "#")
|
||||
}
|
||||
}
|
||||
|
||||
section.DocURL = docURL
|
||||
if section.ID != "" {
|
||||
section.DocURL = docURL + "#" + section.ID
|
||||
}
|
||||
|
||||
if section.Title != "" {
|
||||
@@ -189,19 +197,5 @@ func (p *Parser) extractAPIs(doc *goquery.Document, docURL string) []*API {
|
||||
}
|
||||
|
||||
func resolveURL(base string, href string) string {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
return href
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return baseURL.ResolveReference(hrefURL).String()
|
||||
return parserutil.ResolveURL(base, href)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user