This commit is contained in:
Tomas Dvorak
2026-02-24 10:33:59 +01:00
parent 409acd2e08
commit 898a3c303f
1374 changed files with 290409 additions and 29187 deletions
+102
View File
@@ -0,0 +1,102 @@
package scraper
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
func TestLocalScraperScrapeDirectory(t *testing.T) {
tmp := t.TempDir()
if err := os.WriteFile(filepath.Join(tmp, "README.md"), []byte("# Demo\n\nhello docs"), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(tmp, "notes.txt"), []byte("notes"), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(tmp, "bin.bin"), []byte{0x00, 0x01}, 0o644); err != nil {
t.Fatal(err)
}
s := NewLocalScraper(&Config{Timeout: 2 * time.Second, UserAgent: "DevourTest"})
docs, err := s.Scrape(context.Background(), &Source{Name: "local", Type: SourceTypeLocal, Path: tmp})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(docs) < 2 {
t.Fatalf("expected at least 2 docs, got %d", len(docs))
}
}
func TestLocalScraperDetectChanges(t *testing.T) {
tmp := t.TempDir()
file := filepath.Join(tmp, "README.md")
if err := os.WriteFile(file, []byte("v1"), 0o644); err != nil {
t.Fatal(err)
}
s := NewLocalScraper(&Config{Timeout: 2 * time.Second, UserAgent: "DevourTest"})
src := &Source{Name: "local", Type: SourceTypeLocal, Path: tmp}
changed, hash1, err := s.DetectChanges(context.Background(), src, "")
if err != nil {
t.Fatal(err)
}
if !changed || hash1 == "" {
t.Fatalf("expected first detect to change with non-empty hash, changed=%v hash=%q", changed, hash1)
}
time.Sleep(5 * time.Millisecond)
if err := os.WriteFile(file, []byte("v2"), 0o644); err != nil {
t.Fatal(err)
}
changed, hash2, err := s.DetectChanges(context.Background(), src, hash1)
if err != nil {
t.Fatal(err)
}
if !changed {
t.Fatal("expected change after file update")
}
if hash1 == hash2 {
t.Fatal("expected hash to change")
}
}
func TestLocalScraper_StripsFrontmatterAndMDXPreamble(t *testing.T) {
tmp := t.TempDir()
path := filepath.Join(tmp, "doc.mdx")
content := `---
title: My Doc
slug: /my-doc
---
import { Component } from "x"
export const meta = {}
# Heading
Actual documentation body.
`
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatal(err)
}
s := NewLocalScraper(&Config{Timeout: 2 * time.Second, UserAgent: "DevourTest"})
doc, err := s.fileToDocument(path, &Source{Name: "local", Type: SourceTypeLocal, Path: tmp})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if strings.Contains(doc.Content, "slug: /my-doc") {
t.Fatalf("expected frontmatter to be stripped, got: %q", doc.Content)
}
if strings.Contains(doc.Content, "import { Component }") {
t.Fatalf("expected MDX preamble to be stripped, got: %q", doc.Content)
}
if !strings.Contains(doc.Content, "Actual documentation body.") {
t.Fatalf("expected markdown body in content, got: %q", doc.Content)
}
}