mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
update
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
package cloudflaredocs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/yourorg/devour/pkg/parserutil"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
@@ -110,16 +110,24 @@ func (p *Parser) extractContent(doc *goquery.Document) string {
|
||||
func (p *Parser) extractSections(doc *goquery.Document, docURL string) []*Section {
|
||||
var sections []*Section
|
||||
|
||||
doc.Find("h1, h2, h3").Each(func(_ int, s *goquery.Selection) {
|
||||
doc.Find("h1, h2, h3, h4").Each(func(_ int, s *goquery.Selection) {
|
||||
section := &Section{}
|
||||
|
||||
section.Title = strings.TrimSpace(s.Text())
|
||||
section.ID = strings.TrimSpace(s.AttrOr("id", ""))
|
||||
|
||||
if id, exists := s.Attr("id"); exists {
|
||||
section.ID = id
|
||||
section.DocURL = docURL + "#" + id
|
||||
} else {
|
||||
section.DocURL = docURL
|
||||
if section.ID == "" {
|
||||
section.ID = strings.TrimSpace(s.AttrOr("data-anchor", ""))
|
||||
}
|
||||
if section.ID == "" {
|
||||
if href, exists := s.Find("a[href^='#']").First().Attr("href"); exists {
|
||||
section.ID = strings.TrimPrefix(strings.TrimSpace(href), "#")
|
||||
}
|
||||
}
|
||||
|
||||
section.DocURL = docURL
|
||||
if section.ID != "" {
|
||||
section.DocURL = docURL + "#" + section.ID
|
||||
}
|
||||
|
||||
if section.Title != "" {
|
||||
@@ -189,19 +197,5 @@ func (p *Parser) extractAPIs(doc *goquery.Document, docURL string) []*API {
|
||||
}
|
||||
|
||||
func resolveURL(base string, href string) string {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
return href
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return baseURL.ResolveReference(hrefURL).String()
|
||||
return parserutil.ResolveURL(base, href)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
package cloudflaredocs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const testCloudflarePageHTML = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Workers API | Cloudflare Docs</title>
|
||||
<meta name="description" content="Build and deploy serverless apps.">
|
||||
</head>
|
||||
<body>
|
||||
<div class="product-name">Workers</div>
|
||||
<main>
|
||||
<h1 id="workers-api">Workers API</h1>
|
||||
<p>Cloudflare Workers lets you run JavaScript at the edge.</p>
|
||||
<h2 id="endpoints">Endpoints</h2>
|
||||
<pre><code class="language-http">GET /client/v4/accounts/{id}/workers/scripts</code></pre>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
func TestParsePage(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
page, err := parser.ParsePage(testCloudflarePageHTML, "https://developers.cloudflare.com/workers/api/")
|
||||
if err != nil {
|
||||
t.Fatalf("ParsePage failed: %v", err)
|
||||
}
|
||||
|
||||
if page.Title != "Workers API" {
|
||||
t.Fatalf("unexpected title: %q", page.Title)
|
||||
}
|
||||
if page.Product != "Workers" {
|
||||
t.Fatalf("unexpected product: %q", page.Product)
|
||||
}
|
||||
if page.Description == "" {
|
||||
t.Fatal("expected non-empty description")
|
||||
}
|
||||
if len(page.Sections) < 2 {
|
||||
t.Fatalf("expected at least 2 sections, got %d", len(page.Sections))
|
||||
}
|
||||
if len(page.CodeBlocks) == 0 {
|
||||
t.Fatal("expected at least one code block")
|
||||
}
|
||||
if len(page.APIs) == 0 {
|
||||
t.Fatal("expected at least one parsed API endpoint")
|
||||
}
|
||||
if page.APIs[0].Method != "GET" {
|
||||
t.Fatalf("expected API method GET, got %q", page.APIs[0].Method)
|
||||
}
|
||||
if !strings.HasPrefix(page.APIs[0].Endpoint, "/client/v4/") {
|
||||
t.Fatalf("unexpected endpoint: %q", page.APIs[0].Endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSidebar(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
html := `
|
||||
<div class="sidebar">
|
||||
<a href="/workers/">Workers</a>
|
||||
<a href="/dns/">DNS</a>
|
||||
</div>`
|
||||
|
||||
sections, err := parser.ParseSidebar(html)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseSidebar failed: %v", err)
|
||||
}
|
||||
|
||||
if len(sections) != 2 {
|
||||
t.Fatalf("expected 2 sections, got %d", len(sections))
|
||||
}
|
||||
if sections[0].DocURL != "https://developers.cloudflare.com/workers/" {
|
||||
t.Fatalf("unexpected resolved URL: %q", sections[0].DocURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveURL(t *testing.T) {
|
||||
got := resolveURL("https://developers.cloudflare.com", "/workers/")
|
||||
if got != "https://developers.cloudflare.com/workers/" {
|
||||
t.Fatalf("resolveURL returned %q", got)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user