mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
update
This commit is contained in:
+1012
File diff suppressed because it is too large
Load Diff
+144
@@ -0,0 +1,144 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
)
|
||||
|
||||
func TestDeriveSearchTerms(t *testing.T) {
|
||||
terms := deriveSearchTerms("go", "how to regex match http path")
|
||||
|
||||
if len(terms) == 0 {
|
||||
t.Fatal("expected at least one derived search term")
|
||||
}
|
||||
|
||||
joined := strings.Join(terms, ",")
|
||||
if !strings.Contains(joined, "regexp") {
|
||||
t.Fatalf("expected regexp term in %v", terms)
|
||||
}
|
||||
if !strings.Contains(joined, "net/http") {
|
||||
t.Fatalf("expected net/http term in %v", terms)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScoreDocument(t *testing.T) {
|
||||
query := "regex match in go"
|
||||
docTitleMatch := &scraper.Document{
|
||||
Title: "Package regexp",
|
||||
Content: "Use MustCompile and MatchString to match values.",
|
||||
Type: "go-package",
|
||||
URL: "https://pkg.go.dev/regexp",
|
||||
}
|
||||
docNoMatch := &scraper.Document{
|
||||
Title: "Package archive/tar",
|
||||
Content: "Read and write tar archives.",
|
||||
Type: "go-package",
|
||||
URL: "https://pkg.go.dev/archive/tar",
|
||||
}
|
||||
|
||||
if scoreDocument(query, docTitleMatch) <= scoreDocument(query, docNoMatch) {
|
||||
t.Fatal("expected regex-related document to have a higher score")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecommendedAPI(t *testing.T) {
|
||||
docs := []rankedDoc{
|
||||
{
|
||||
doc: &scraper.Document{
|
||||
Title: "regexp.func MustCompile ¶",
|
||||
URL: "https://pkg.go.dev/regexp",
|
||||
Content: "re := regexp.MustCompile(`\\\\d+`)\nif re.MatchString(input) { fmt.Println(\"ok\") }",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
apis := extractRecommendedAPI(docs)
|
||||
if len(apis) == 0 {
|
||||
t.Fatal("expected API extraction to return at least one call")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractSnippet(t *testing.T) {
|
||||
content := "The regexp package implements regular expression search. Use MustCompile for fixed patterns."
|
||||
snippet := extractSnippet(content, []string{"regexp"})
|
||||
if snippet == "" {
|
||||
t.Fatal("expected non-empty snippet")
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(snippet), "regexp") {
|
||||
t.Fatalf("snippet should mention regexp, got: %q", snippet)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCandidateDocURLs_FrameworkFallbacks(t *testing.T) {
|
||||
next, err := candidateDocURLs("nextjs", "routing")
|
||||
if err != nil {
|
||||
t.Fatalf("candidateDocURLs(nextjs) error: %v", err)
|
||||
}
|
||||
if len(next) < 2 {
|
||||
t.Fatalf("expected fallback URLs for nextjs, got %v", next)
|
||||
}
|
||||
if next[0] != "https://nextjs.org/docs/app/building-your-application/routing" {
|
||||
t.Fatalf("unexpected primary nextjs URL: %q", next[0])
|
||||
}
|
||||
|
||||
remix, err := candidateDocURLs("remix", "routes")
|
||||
if err != nil {
|
||||
t.Fatalf("candidateDocURLs(remix) error: %v", err)
|
||||
}
|
||||
if len(remix) == 0 || remix[0] != "https://v2.remix.run/docs/file-conventions/routes" {
|
||||
t.Fatalf("unexpected remix candidate URLs: %v", remix)
|
||||
}
|
||||
|
||||
solid, err := candidateDocURLs("solid", "router")
|
||||
if err != nil {
|
||||
t.Fatalf("candidateDocURLs(solid) error: %v", err)
|
||||
}
|
||||
if len(solid) == 0 || !strings.Contains(solid[0], "github.com/solidjs/solid-docs") {
|
||||
t.Fatalf("unexpected solid candidate URLs: %v", solid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrimaryQueryTokenSkipsQuestionWords(t *testing.T) {
|
||||
token := primaryQueryToken("what does routing do in remix")
|
||||
if token == "" {
|
||||
t.Fatal("expected non-empty token")
|
||||
}
|
||||
if token == "what" || token == "does" {
|
||||
t.Fatalf("expected informative token, got %q", token)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeriveSearchTermsSolidRouting(t *testing.T) {
|
||||
terms := deriveSearchTerms("solid", "how to do routing in solid")
|
||||
joined := strings.Join(terms, ",")
|
||||
if !strings.Contains(joined, "solid-router") {
|
||||
t.Fatalf("expected solid-router term in %v", terms)
|
||||
}
|
||||
if strings.Contains(joined, "signals") {
|
||||
t.Fatalf("did not expect signals default for routing question, got %v", terms)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldFallbackToLive(t *testing.T) {
|
||||
strong := []rankedDoc{
|
||||
{
|
||||
doc: &scraper.Document{Title: "Routing Guide", Content: "routing with file based routes", URL: "https://nextjs.org/docs/routing"},
|
||||
score: 2.2,
|
||||
},
|
||||
}
|
||||
if shouldFallbackToLive(strong, []string{"routing"}) {
|
||||
t.Fatal("expected strong local match to skip live fallback")
|
||||
}
|
||||
|
||||
weak := []rankedDoc{
|
||||
{
|
||||
doc: &scraper.Document{Title: "Misc", Content: "unrelated", URL: "https://example.com"},
|
||||
score: 0.1,
|
||||
},
|
||||
}
|
||||
if !shouldFallbackToLive(weak, []string{"routing"}) {
|
||||
t.Fatal("expected weak local match to trigger live fallback")
|
||||
}
|
||||
}
|
||||
+181
@@ -0,0 +1,181 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
autoDryRun bool
|
||||
autoJSON bool
|
||||
autoLang string
|
||||
)
|
||||
|
||||
var autoCmd = &cobra.Command{
|
||||
Use: "auto <intent>",
|
||||
Short: "Route natural-language intent to the best Devour command",
|
||||
Long: `Auto-classify intent and run the best matching command (get/scrape/ask/quality).
|
||||
|
||||
Examples:
|
||||
devour auto "how to parse json in go"
|
||||
devour auto "https://pkg.go.dev/net/http"
|
||||
devour auto "check code quality" --dry-run
|
||||
devour auto "what is useEffect" --lang react`,
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
RunE: runAuto,
|
||||
}
|
||||
|
||||
func init() {
|
||||
autoCmd.Flags().BoolVar(&autoDryRun, "dry-run", false, "print selected command without executing")
|
||||
autoCmd.Flags().BoolVar(&autoJSON, "json", false, "output route decision as JSON")
|
||||
autoCmd.Flags().StringVar(&autoLang, "lang", "", "optional language override for ask/get routes")
|
||||
}
|
||||
|
||||
type autoDecision struct {
|
||||
Intent string `json:"intent"`
|
||||
Route string `json:"route"`
|
||||
Reason string `json:"reason"`
|
||||
Command []string `json:"command"`
|
||||
}
|
||||
|
||||
func runAuto(cmd *cobra.Command, args []string) error {
|
||||
intent := strings.TrimSpace(strings.Join(args, " "))
|
||||
if intent == "" {
|
||||
return fmt.Errorf("intent is required")
|
||||
}
|
||||
|
||||
decision, err := classifyIntent(intent, strings.TrimSpace(autoLang))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if autoJSON {
|
||||
enc := json.NewEncoder(cmd.OutOrStdout())
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(decision)
|
||||
}
|
||||
|
||||
fmt.Printf("Route: %s\n", decision.Route)
|
||||
fmt.Printf("Reason: %s\n", decision.Reason)
|
||||
fmt.Printf("Command: devour %s\n", strings.Join(decision.Command, " "))
|
||||
|
||||
if autoDryRun {
|
||||
return nil
|
||||
}
|
||||
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
run := exec.Command(exe, decision.Command...)
|
||||
run.Stdout = cmd.OutOrStdout()
|
||||
run.Stderr = cmd.ErrOrStderr()
|
||||
return run.Run()
|
||||
}
|
||||
|
||||
func classifyIntent(intent, langOverride string) (*autoDecision, error) {
|
||||
lower := strings.ToLower(intent)
|
||||
trimmed := strings.TrimSpace(intent)
|
||||
|
||||
if u, err := url.Parse(trimmed); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
|
||||
route := []string{"scrape", trimmed}
|
||||
return &autoDecision{Intent: intent, Route: "scrape", Reason: "detected URL input", Command: route}, nil
|
||||
}
|
||||
|
||||
if strings.Contains(lower, "quality") || strings.Contains(lower, "technical debt") || strings.Contains(lower, "lint") || strings.Contains(lower, "code smell") {
|
||||
route := []string{"quality", "status"}
|
||||
if strings.Contains(lower, "scan") {
|
||||
route = []string{"quality", "scan", "."}
|
||||
}
|
||||
return &autoDecision{Intent: intent, Route: "quality", Reason: "detected quality-analysis intent", Command: route}, nil
|
||||
}
|
||||
|
||||
language := strings.TrimSpace(langOverride)
|
||||
if language == "" {
|
||||
language = inferLanguageFromText(lower)
|
||||
}
|
||||
if language != "" {
|
||||
if canonical, ok := normalizeLanguage(language); ok {
|
||||
language = canonical
|
||||
} else {
|
||||
language = ""
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(lower, "?") || strings.Contains(lower, "how") || strings.Contains(lower, "why") || strings.Contains(lower, "what") {
|
||||
if language == "" {
|
||||
language = "go"
|
||||
}
|
||||
route := []string{"ask", "--lang", language, intent, "--format", "text"}
|
||||
return &autoDecision{Intent: intent, Route: "ask", Reason: "question-style intent", Command: route}, nil
|
||||
}
|
||||
|
||||
if language == "" {
|
||||
language = "go"
|
||||
}
|
||||
keyword := inferKeyword(intent)
|
||||
if canonical, ok := normalizeLanguage(keyword); ok && canonical == language {
|
||||
keyword = "overview"
|
||||
}
|
||||
route := []string{"get", language, keyword}
|
||||
return &autoDecision{Intent: intent, Route: "get", Reason: "default docs retrieval route", Command: route}, nil
|
||||
}
|
||||
|
||||
func inferLanguageFromText(text string) string {
|
||||
text = strings.ToLower(text)
|
||||
if strings.Contains(text, "c#") {
|
||||
return "csharp"
|
||||
}
|
||||
if strings.Contains(text, "next.js") {
|
||||
return "nextjs"
|
||||
}
|
||||
|
||||
tokens := strings.FieldsFunc(text, func(r rune) bool {
|
||||
return !(unicode.IsLetter(r) || unicode.IsDigit(r))
|
||||
})
|
||||
tokenSet := make(map[string]bool, len(tokens))
|
||||
for _, tok := range tokens {
|
||||
if tok != "" {
|
||||
tokenSet[tok] = true
|
||||
}
|
||||
}
|
||||
|
||||
aliases := make([]string, 0, len(languageAliases()))
|
||||
for alias := range languageAliases() {
|
||||
aliases = append(aliases, alias)
|
||||
}
|
||||
sort.Slice(aliases, func(i, j int) bool {
|
||||
return len(aliases[i]) > len(aliases[j])
|
||||
})
|
||||
|
||||
for _, alias := range aliases {
|
||||
if tokenSet[alias] {
|
||||
return alias
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func inferKeyword(intent string) string {
|
||||
words := strings.Fields(strings.ToLower(intent))
|
||||
stop := map[string]bool{
|
||||
"get": true, "docs": true, "documentation": true, "about": true, "for": true, "on": true,
|
||||
"the": true, "a": true, "an": true, "show": true, "me": true, "please": true,
|
||||
}
|
||||
for _, w := range words {
|
||||
w = strings.Trim(w, ",.!?;:")
|
||||
if w == "" || stop[w] || len(w) < 2 {
|
||||
continue
|
||||
}
|
||||
return w
|
||||
}
|
||||
return "overview"
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
package cmd
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestInferLanguageFromText_UsesTokenBoundaries(t *testing.T) {
|
||||
if got := inferLanguageFromText("get nextjs docs"); got != "nextjs" {
|
||||
t.Fatalf("inferLanguageFromText matched %q, want %q", got, "nextjs")
|
||||
}
|
||||
if got := inferLanguageFromText("read docs for architecture"); got != "" {
|
||||
t.Fatalf("inferLanguageFromText should not infer language from plain docs text, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifyIntent_GetRouteKeywordFallback(t *testing.T) {
|
||||
decision, err := classifyIntent("get nextjs docs", "")
|
||||
if err != nil {
|
||||
t.Fatalf("classifyIntent returned error: %v", err)
|
||||
}
|
||||
if decision.Route != "get" {
|
||||
t.Fatalf("expected get route, got %q", decision.Route)
|
||||
}
|
||||
if len(decision.Command) != 3 {
|
||||
t.Fatalf("expected 3 command args, got %v", decision.Command)
|
||||
}
|
||||
if decision.Command[1] != "nextjs" {
|
||||
t.Fatalf("expected language nextjs, got %q", decision.Command[1])
|
||||
}
|
||||
if decision.Command[2] != "overview" {
|
||||
t.Fatalf("expected keyword overview, got %q", decision.Command[2])
|
||||
}
|
||||
}
|
||||
+133
-45
@@ -14,6 +14,7 @@ import argparse
|
||||
class ModernBannerGenerator:
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
self.fonts = self._init_fonts()
|
||||
|
||||
# Devour brand colors - consistent with Go theme
|
||||
self.colors = {
|
||||
@@ -56,6 +57,49 @@ class ModernBannerGenerator:
|
||||
'severity_t3': (251, 146, 60), # #fb923c - bright orange
|
||||
'severity_t4': (248, 113, 113), # #f87171 - bright red
|
||||
}
|
||||
|
||||
def _init_fonts(self):
|
||||
"""Initialize font candidates and cache."""
|
||||
# Prefer widely-available fonts on Linux/macOS/Windows.
|
||||
font_candidates = {
|
||||
"regular": [
|
||||
"arial.ttf",
|
||||
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
||||
"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
|
||||
"/System/Library/Fonts/Supplemental/Arial.ttf",
|
||||
"/Library/Fonts/Arial.ttf",
|
||||
],
|
||||
"bold": [
|
||||
"arialbd.ttf",
|
||||
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
|
||||
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
|
||||
"/System/Library/Fonts/Supplemental/Arial Bold.ttf",
|
||||
"/Library/Fonts/Arial Bold.ttf",
|
||||
],
|
||||
}
|
||||
|
||||
return {
|
||||
"candidates": font_candidates,
|
||||
"cache": {},
|
||||
}
|
||||
|
||||
def get_font(self, size, weight="regular"):
|
||||
"""Get a cached font or fall back to the default."""
|
||||
key = (size, weight)
|
||||
if key in self.fonts["cache"]:
|
||||
return self.fonts["cache"][key]
|
||||
|
||||
for path in self.fonts["candidates"].get(weight, []):
|
||||
try:
|
||||
font = ImageFont.truetype(path, size)
|
||||
self.fonts["cache"][key] = font
|
||||
return font
|
||||
except:
|
||||
continue
|
||||
|
||||
font = ImageFont.load_default()
|
||||
self.fonts["cache"][key] = font
|
||||
return font
|
||||
|
||||
def get_score_color(self, score, muted=False):
|
||||
if score >= 90:
|
||||
@@ -89,6 +133,22 @@ class ModernBannerGenerator:
|
||||
|
||||
for x in range(width):
|
||||
img.putpixel((x, y), (r, g, b))
|
||||
|
||||
# Add subtle radial glows for depth
|
||||
self.draw_glow(img, width * 0.15, height * 0.2, 220, (71, 85, 105), 40)
|
||||
self.draw_glow(img, width * 0.85, height * 0.75, 260, (251, 146, 60), 35)
|
||||
|
||||
def draw_glow(self, img, cx, cy, radius, color, max_alpha):
|
||||
"""Draw a soft radial glow."""
|
||||
draw = ImageDraw.Draw(img)
|
||||
steps = 12
|
||||
for i in range(steps):
|
||||
r = radius - (radius * i / steps)
|
||||
alpha = int(max_alpha * (1 - i / steps))
|
||||
draw.ellipse(
|
||||
[(cx - r, cy - r), (cx + r, cy + r)],
|
||||
fill=(*color, alpha),
|
||||
)
|
||||
|
||||
def draw_glass_card(self, draw, x, y, width, height, border_radius=12, use_alt=False):
|
||||
"""Draw glass morphism card with enhanced effects"""
|
||||
@@ -125,9 +185,9 @@ class ModernBannerGenerator:
|
||||
def draw_score_circle(self, draw, cx, cy, radius, score, label="OVERALL", is_primary=True):
|
||||
"""Draw enhanced circular score visualization"""
|
||||
# Background circle with subtle border
|
||||
draw.ellipse([(cx-radius-2, cy-radius-2), (cx+radius+2, cy+radius+2)],
|
||||
draw.ellipse([(cx-radius-2, cy-radius-2), (cx+radius+2, cy+radius+2)],
|
||||
fill=(*self.colors['border'], 100))
|
||||
draw.ellipse([(cx-radius, cy-radius), (cx+radius, cy+radius)],
|
||||
draw.ellipse([(cx-radius, cy-radius), (cx+radius, cy+radius)],
|
||||
fill=self.colors['card'], outline=self.colors['border'])
|
||||
|
||||
# Progress arc with enhanced styling
|
||||
@@ -136,25 +196,26 @@ class ModernBannerGenerator:
|
||||
percentage = score / 100.0
|
||||
|
||||
# Draw background arc
|
||||
draw.arc([(cx-radius+4, cy-radius+4), (cx+radius-4, cy+radius-4)],
|
||||
draw.arc([(cx-radius+4, cy-radius+4), (cx+radius-4, cy+radius-4)],
|
||||
-90, 270, fill=self.colors['border_subtle'], width=6)
|
||||
|
||||
# Draw progress arc
|
||||
start_angle = -90
|
||||
end_angle = start_angle + (360 * percentage)
|
||||
arc_width = 8 if is_primary else 6
|
||||
arc_width = 9 if is_primary else 6
|
||||
|
||||
draw.arc([(cx-radius+4, cy-radius+4), (cx+radius-4, cy+radius-4)],
|
||||
draw.arc([(cx-radius+4, cy-radius+4), (cx+radius-4, cy+radius-4)],
|
||||
start_angle, end_angle,
|
||||
fill=score_color, width=arc_width)
|
||||
|
||||
# Inner glow ring
|
||||
if is_primary:
|
||||
draw.arc([(cx-radius+10, cy-radius+10), (cx+radius-10, cy+radius-10)],
|
||||
start_angle, end_angle, fill=score_color, width=2)
|
||||
|
||||
# Enhanced typography
|
||||
try:
|
||||
font_large = ImageFont.truetype("arial.ttf", 32 if is_primary else 28)
|
||||
font_small = ImageFont.truetype("arial.ttf", 11)
|
||||
except:
|
||||
font_large = ImageFont.load_default()
|
||||
font_small = ImageFont.load_default()
|
||||
font_large = self.get_font(34 if is_primary else 28, weight="bold")
|
||||
font_small = self.get_font(11, weight="regular")
|
||||
|
||||
# Score text
|
||||
score_text = f"{int(score)}%"
|
||||
@@ -163,14 +224,14 @@ class ModernBannerGenerator:
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
text_color = self.colors['text'] if is_primary else self.colors['text_muted']
|
||||
draw.text((cx - text_width//2, cy - text_height//2 - 2), score_text,
|
||||
draw.text((cx - text_width//2, cy - text_height//2 - 2), score_text,
|
||||
fill=text_color, font=font_large)
|
||||
|
||||
# Label
|
||||
label_bbox = draw.textbbox((0, 0), label, font=font_small)
|
||||
label_width = label_bbox[2] - label_bbox[0]
|
||||
|
||||
draw.text((cx - label_width//2, cy + radius + 15), label,
|
||||
draw.text((cx - label_width//2, cy + radius + 15), label,
|
||||
fill=self.colors['text_dim'], font=font_small)
|
||||
|
||||
def draw_grade_badge(self, draw, x, y, grade):
|
||||
@@ -185,14 +246,11 @@ class ModernBannerGenerator:
|
||||
6, fill=(0, 0, 0, 60))
|
||||
|
||||
# Main badge
|
||||
draw.rounded_rectangle([(x, y), (x + badge_width, y + badge_height)],
|
||||
draw.rounded_rectangle([(x, y), (x + badge_width, y + badge_height)],
|
||||
6, fill=grade_color, outline=self.colors['border'])
|
||||
|
||||
# Grade text with better typography
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", 18)
|
||||
except:
|
||||
font = ImageFont.load_default()
|
||||
font = self.get_font(18, weight="bold")
|
||||
|
||||
bbox = draw.textbbox((0, 0), grade, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
@@ -201,15 +259,14 @@ class ModernBannerGenerator:
|
||||
draw.text((x + badge_width//2 - text_width//2, y + badge_height//2 - text_height//2 + 1),
|
||||
grade, fill=(255, 255, 255), font=font)
|
||||
|
||||
def draw_text(self, draw, text, x, y, size=14, color=None, centered=False):
|
||||
def draw_text(self, draw, text, x, y, size=14, color=None, centered=False, max_width=None, min_size=9, weight="regular"):
|
||||
"""Draw enhanced text with better typography"""
|
||||
if color is None:
|
||||
color = self.colors['text']
|
||||
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", size)
|
||||
except:
|
||||
font = ImageFont.load_default()
|
||||
font = self.get_font(size, weight=weight)
|
||||
if max_width is not None:
|
||||
font = self.fit_font(draw, text, font, max_width, min_size=min_size, weight=weight)
|
||||
|
||||
if centered:
|
||||
bbox = draw.textbbox((0, 0), text, font=font)
|
||||
@@ -217,16 +274,43 @@ class ModernBannerGenerator:
|
||||
x = x - text_width // 2
|
||||
|
||||
draw.text((x, y), text, fill=color, font=font)
|
||||
|
||||
def fit_font(self, draw, text, font, max_width, min_size=9, weight="regular"):
|
||||
"""Shrink font until text fits max width."""
|
||||
if font == ImageFont.load_default():
|
||||
return font
|
||||
size = font.size if hasattr(font, "size") else min_size
|
||||
current = font
|
||||
while size > min_size:
|
||||
bbox = draw.textbbox((0, 0), text, font=current)
|
||||
if (bbox[2] - bbox[0]) <= max_width:
|
||||
return current
|
||||
size -= 1
|
||||
current = self.get_font(size, weight=weight)
|
||||
return current
|
||||
|
||||
def truncate_text(self, draw, text, font, max_width):
|
||||
"""Truncate text with ellipsis to fit width."""
|
||||
if max_width <= 0:
|
||||
return ""
|
||||
if draw.textbbox((0, 0), text, font=font)[2] <= max_width:
|
||||
return text
|
||||
ellipsis = "..."
|
||||
for i in range(len(text), 0, -1):
|
||||
candidate = text[:i] + ellipsis
|
||||
if draw.textbbox((0, 0), candidate, font=font)[2] <= max_width:
|
||||
return candidate
|
||||
return ellipsis
|
||||
|
||||
def draw_metric_card(self, draw, x, y, width, height, title, value, color):
|
||||
"""Draw metric card"""
|
||||
self.draw_glass_card(draw, x, y, width, height)
|
||||
|
||||
# Title
|
||||
self.draw_text(draw, title, x + 15, y + 15, size=12, color=self.colors['text_muted'])
|
||||
self.draw_text(draw, title, x + 15, y + 14, size=12, color=self.colors['text_muted'])
|
||||
|
||||
# Value
|
||||
self.draw_text(draw, value, x + 15, y + 40, size=20, color=color)
|
||||
self.draw_text(draw, value, x + 15, y + 38, size=20, color=color, weight="bold")
|
||||
|
||||
def draw_severity_bars(self, draw, x, y, width, height, find_by_tier):
|
||||
"""Draw enhanced severity bars"""
|
||||
@@ -313,18 +397,18 @@ class ModernBannerGenerator:
|
||||
# Enhanced header section
|
||||
header_y = content_y + 20
|
||||
self.draw_text(draw, "DEVOUR SCORE", content_x + content_width//2, header_y,
|
||||
size=20, color=self.colors['text'], centered=True)
|
||||
size=20, color=self.colors['text'], centered=True, weight="bold")
|
||||
|
||||
# Project info
|
||||
project_name = self.data['project_name']
|
||||
version_text = f"v{self.data['version']}" if self.data['version'] else "latest"
|
||||
project_text = f"{project_name} {version_text}"
|
||||
self.draw_text(draw, project_text, content_x + content_width//2, header_y + 25,
|
||||
size=14, color=self.colors['text_muted'], centered=True)
|
||||
size=14, color=self.colors['text_muted'], centered=True, max_width=content_width - 120)
|
||||
|
||||
# Timestamp
|
||||
time_text = self.data.get('timestamp', 'Today')
|
||||
self.draw_text(draw, time_text, content_x + content_width//2,
|
||||
self.draw_text(draw, time_text, content_x + content_width//2,
|
||||
content_y + content_height - 25,
|
||||
size=11, color=self.colors['text_dim'], centered=True)
|
||||
|
||||
@@ -347,19 +431,19 @@ class ModernBannerGenerator:
|
||||
|
||||
# Total findings
|
||||
self.draw_text(draw, str(findings_total), col_x + col_width//2, metrics_y,
|
||||
size=18, color=self.colors['text'], centered=True)
|
||||
size=18, color=self.colors['text'], centered=True, weight="bold")
|
||||
self.draw_text(draw, "TOTAL", col_x + col_width//2, metrics_y + 22,
|
||||
size=10, color=self.colors['text_muted'], centered=True)
|
||||
|
||||
# Open findings
|
||||
self.draw_text(draw, str(findings_open), col_x + col_width + col_width//2, metrics_y,
|
||||
size=18, color=self.colors['orange'], centered=True)
|
||||
size=18, color=self.colors['orange'], centered=True, weight="bold")
|
||||
self.draw_text(draw, "OPEN", col_x + col_width + col_width//2, metrics_y + 22,
|
||||
size=10, color=self.colors['text_muted'], centered=True)
|
||||
|
||||
# Resolved findings
|
||||
self.draw_text(draw, str(findings_closed), col_x + 2*col_width + col_width//2, metrics_y,
|
||||
size=18, color=self.colors['score_a'], centered=True)
|
||||
size=18, color=self.colors['score_a'], centered=True, weight="bold")
|
||||
self.draw_text(draw, "RESOLVED", col_x + 2*col_width + col_width//2, metrics_y + 22,
|
||||
size=10, color=self.colors['text_muted'], centered=True)
|
||||
|
||||
@@ -379,7 +463,7 @@ class ModernBannerGenerator:
|
||||
# Header section
|
||||
header_y = 30
|
||||
self.draw_text(draw, f"{self.data['project_name']} Quality Report",
|
||||
width//2, header_y, size=28, color=self.colors['text'], centered=True)
|
||||
width//2, header_y, size=28, color=self.colors['text'], centered=True, weight="bold", max_width=width - 80)
|
||||
|
||||
version_text = f"v{self.data['version']}" if self.data['version'] else "latest"
|
||||
self.draw_text(draw, version_text, width//2, header_y + 35,
|
||||
@@ -399,8 +483,8 @@ class ModernBannerGenerator:
|
||||
# Score details
|
||||
score_details_y = score_y + 100
|
||||
self.draw_text(draw, f"Overall: {int(self.data['overall_score'])}%",
|
||||
score_x, score_details_y, size=20,
|
||||
color=self.get_score_color(self.data['overall_score']), centered=True)
|
||||
score_x, score_details_y, size=20,
|
||||
color=self.get_score_color(self.data['overall_score']), centered=True, weight="bold")
|
||||
self.draw_text(draw, f"Strict: {int(self.data['strict_score'])}%",
|
||||
score_x, score_details_y + 25, size=16,
|
||||
color=self.get_score_color(self.data['strict_score'], muted=True), centered=True)
|
||||
@@ -419,7 +503,7 @@ class ModernBannerGenerator:
|
||||
|
||||
# Column 1 Header
|
||||
self.draw_text(draw, "Score Breakdown", col1_x + col_width//2, grid_start_y + 20,
|
||||
size=18, color=self.colors['text'], centered=True)
|
||||
size=18, color=self.colors['text'], centered=True, weight="bold")
|
||||
|
||||
# Column 1 Data
|
||||
score_data = [
|
||||
@@ -439,7 +523,7 @@ class ModernBannerGenerator:
|
||||
|
||||
# Value
|
||||
self.draw_text(draw, value, col1_x + col_width//2, data_y + 35,
|
||||
size=24, color=color, centered=True)
|
||||
size=24, color=color, centered=True, weight="bold")
|
||||
|
||||
data_y += 80
|
||||
|
||||
@@ -449,15 +533,19 @@ class ModernBannerGenerator:
|
||||
|
||||
# Column 2 Header
|
||||
self.draw_text(draw, "Findings by Type", col2_x + col_width//2, grid_start_y + 20,
|
||||
size=18, color=self.colors['text'], centered=True)
|
||||
size=18, color=self.colors['text'], centered=True, weight="bold")
|
||||
|
||||
# Column 2 Data - Top finding types
|
||||
type_data_y = grid_start_y + 60
|
||||
type_items = list(self.data['find_by_type'].items())[:6] # Top 6 types
|
||||
max_type_count = max(self.data['find_by_type'].values()) if self.data['find_by_type'] else 1
|
||||
|
||||
if not type_items:
|
||||
self.draw_text(draw, "No findings", col2_x + col_width//2, grid_start_y + 110,
|
||||
size=14, color=self.colors['text_dim'], centered=True)
|
||||
for issue_type, count in type_items:
|
||||
# Type bar
|
||||
bar_width = int((col_width - 40) * (count / max(self.data['find_by_type'].values())))
|
||||
bar_width = int((col_width - 40) * (count / max_type_count))
|
||||
bar_height = 22
|
||||
|
||||
# Bar background
|
||||
@@ -469,9 +557,9 @@ class ModernBannerGenerator:
|
||||
4, fill=self.colors['orange'])
|
||||
|
||||
# Type label
|
||||
label_text = f"{issue_type}"
|
||||
if len(label_text) > 20:
|
||||
label_text = label_text[:17] + "..."
|
||||
label_text = f"{issue_type}".replace("_", " ")
|
||||
font_label = self.get_font(11, weight="regular")
|
||||
label_text = self.truncate_text(draw, label_text, font_label, col_width - 90)
|
||||
self.draw_text(draw, label_text, col2_x + 25, type_data_y + 2,
|
||||
size=11, color=self.colors['text_muted'])
|
||||
|
||||
@@ -487,7 +575,7 @@ class ModernBannerGenerator:
|
||||
|
||||
# Column 3 Header
|
||||
self.draw_text(draw, "Issues by Severity", col3_x + col_width//2, grid_start_y + 20,
|
||||
size=18, color=self.colors['text'], centered=True)
|
||||
size=18, color=self.colors['text'], centered=True, weight="bold")
|
||||
|
||||
# Column 3 Data - Severity breakdown
|
||||
severity_data_y = grid_start_y + 60
|
||||
@@ -510,11 +598,11 @@ class ModernBannerGenerator:
|
||||
|
||||
# Severity name
|
||||
self.draw_text(draw, severity_name, col3_x + 50, severity_data_y + 15,
|
||||
size=14, color=self.colors['text'])
|
||||
size=14, color=self.colors['text'], max_width=col_width - 70)
|
||||
|
||||
# Count
|
||||
self.draw_text(draw, f"{count} issues", col3_x + 50, severity_data_y + 35,
|
||||
size=16, color=color)
|
||||
size=16, color=color, weight="bold")
|
||||
|
||||
severity_data_y += 70
|
||||
|
||||
@@ -539,7 +627,7 @@ class ModernBannerGenerator:
|
||||
|
||||
# Value
|
||||
self.draw_text(draw, value, metric_x + metrics_width//2, summary_y + 10,
|
||||
size=18, color=color, centered=True)
|
||||
size=18, color=color, centered=True, weight="bold")
|
||||
|
||||
# Label
|
||||
self.draw_text(draw, label, metric_x + metrics_width//2, summary_y + 30,
|
||||
|
||||
@@ -24,7 +24,6 @@ This command will:
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(demoCmd)
|
||||
}
|
||||
|
||||
func runDemo(cmd *cobra.Command, args []string) error {
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"version": "1",
|
||||
"built_at": "2026-02-23T11:19:21.65415175+01:00",
|
||||
"docs": []
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1",
|
||||
"built_at": "2026-02-23T11:19:21.65415175+01:00",
|
||||
"docs_dir": "./devour_data/docs",
|
||||
"source_file_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
||||
"doc_count": 0
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
//go:build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
@@ -5,7 +7,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/yourorg/devour/internal/quality"
|
||||
"github.com/yourorg/devour/internal/quality/scorecard"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -80,14 +81,14 @@ func main() {
|
||||
},
|
||||
LastScan: time.Now(),
|
||||
Scorecard: &quality.Scorecard{
|
||||
TotalScore: 72,
|
||||
StrictScore: 68,
|
||||
TotalScore: 72,
|
||||
StrictScore: 68,
|
||||
FindingsByType: map[string]int{
|
||||
"complexity": 1,
|
||||
"naming": 1,
|
||||
"duplication": 1,
|
||||
"security": 1,
|
||||
"unused_import": 1,
|
||||
"complexity": 1,
|
||||
"naming": 1,
|
||||
"duplication": 1,
|
||||
"security": 1,
|
||||
"unused_import": 1,
|
||||
},
|
||||
FindingsByTier: map[quality.Severity]int{
|
||||
quality.SeverityT1: 1,
|
||||
|
||||
+226
-58
@@ -2,6 +2,7 @@ package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
@@ -11,112 +12,210 @@ var getCmd = &cobra.Command{
|
||||
Use: "get <language> <keyword>",
|
||||
Short: "Get documentation for a language/framework",
|
||||
Long: `Quickly fetch documentation for popular languages and frameworks.
|
||||
This command automatically maps language+keyword combinations to their official documentation sites.
|
||||
|
||||
Supported languages:
|
||||
go, golang - Go documentation (pkg.go.dev)
|
||||
rust - Rust documentation (docs.rs)
|
||||
python, py - Python documentation (docs.python.org)
|
||||
java - Java documentation (docs.oracle.com)
|
||||
spring - Spring Boot documentation (docs.spring.io)
|
||||
typescript, ts - TypeScript documentation (typescriptlang.org)
|
||||
react - React documentation (react.dev)
|
||||
vue - Vue.js documentation (vuejs.org)
|
||||
nuxt - Nuxt documentation (nuxt.com)
|
||||
docker - Docker documentation (docs.docker.com)
|
||||
cloudflare, cf - Cloudflare documentation (developers.cloudflare.com)
|
||||
astro - Astro documentation (docs.astro.build)
|
||||
This command maps language+keyword combinations to official documentation sources.
|
||||
|
||||
Examples:
|
||||
devour get go http # Go HTTP package documentation
|
||||
devour get python asyncio # Python asyncio module
|
||||
devour get react hooks # React Hooks documentation
|
||||
devour get docker compose # Docker Compose docs
|
||||
devour get rust tokio # Rust Tokio crate`,
|
||||
devour get go http
|
||||
devour get python asyncio
|
||||
devour get react hooks
|
||||
devour get nextjs routing
|
||||
devour get express middleware`,
|
||||
Args: cobra.ExactArgs(2),
|
||||
RunE: runGet,
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Add flags that can override defaults
|
||||
getCmd.Flags().StringVarP(&scrapeFormat, "format", "f", "json", "output format (json, markdown)")
|
||||
getCmd.Flags().StringVarP(&scrapeOutput, "output", "o", "", "output directory (default: devour_data/docs)")
|
||||
getCmd.Flags().StringVarP(&scrapeOutput, "output", "o", "", "output directory (default: configured docs dir)")
|
||||
getCmd.Flags().IntVar(&scrapeConcurrency, "concurrency", 10, "parallel scraping workers")
|
||||
}
|
||||
|
||||
func runGet(cmd *cobra.Command, args []string) error {
|
||||
language := strings.ToLower(args[0])
|
||||
keyword := strings.ToLower(args[1])
|
||||
langIn := strings.ToLower(strings.TrimSpace(args[0]))
|
||||
keyword := strings.TrimSpace(args[1])
|
||||
if keyword == "" {
|
||||
return fmt.Errorf("keyword is required")
|
||||
}
|
||||
|
||||
language, ok := normalizeLanguage(langIn)
|
||||
if !ok {
|
||||
return fmt.Errorf("unsupported language: %s. Supported: %s", langIn, strings.Join(supportedLanguages(), ", "))
|
||||
}
|
||||
|
||||
// Map language to base URL and construct full URL
|
||||
url, err := constructDocURL(language, keyword)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the scrape type based on language
|
||||
sourceType := mapLanguageToType(language)
|
||||
|
||||
// Reuse the existing scrape logic with pre-determined values
|
||||
scrapeType = string(sourceType)
|
||||
sourceURL := url
|
||||
scrapeType = sourceType
|
||||
|
||||
fmt.Printf("Getting docs for: %s %s\n", language, keyword)
|
||||
fmt.Printf("URL: %s\n", sourceURL)
|
||||
fmt.Printf("Type: %s\n", sourceType)
|
||||
fmt.Println()
|
||||
fmt.Printf("URL: %s\n", url)
|
||||
fmt.Printf("Type: %s\n\n", sourceType)
|
||||
|
||||
// Call the existing scrape logic
|
||||
return runScrape(cmd, []string{sourceURL})
|
||||
return runScrape(cmd, []string{url})
|
||||
}
|
||||
|
||||
func constructDocURL(language, keyword string) (string, error) {
|
||||
language = strings.ToLower(strings.TrimSpace(language))
|
||||
keyword = strings.TrimSpace(keyword)
|
||||
lowerKeyword := strings.ToLower(keyword)
|
||||
|
||||
switch language {
|
||||
case "go", "golang":
|
||||
return fmt.Sprintf("https://pkg.go.dev/%s", keyword), nil
|
||||
case "go":
|
||||
return fmt.Sprintf("https://pkg.go.dev/%s", lowerKeyword), nil
|
||||
case "rust":
|
||||
return fmt.Sprintf("https://docs.rs/%s/latest/%s/", keyword, keyword), nil
|
||||
case "python", "py":
|
||||
if keyword == "stdlib" || keyword == "standard" {
|
||||
return fmt.Sprintf("https://docs.rs/%s/latest/%s/", lowerKeyword, lowerKeyword), nil
|
||||
case "python":
|
||||
if lowerKeyword == "stdlib" || lowerKeyword == "standard" {
|
||||
return "https://docs.python.org/3/library/", nil
|
||||
}
|
||||
return fmt.Sprintf("https://docs.python.org/3/library/%s.html", keyword), nil
|
||||
return fmt.Sprintf("https://docs.python.org/3/library/%s.html", lowerKeyword), nil
|
||||
case "java":
|
||||
return fmt.Sprintf("https://docs.oracle.com/javase/8/docs/api/%s.html", keyword), nil
|
||||
return fmt.Sprintf("https://docs.oracle.com/javase/8/docs/api/%s.html", lowerKeyword), nil
|
||||
case "spring":
|
||||
return fmt.Sprintf("https://docs.spring.io/spring-boot/docs/current/reference/htmlsingle/#%s", keyword), nil
|
||||
case "typescript", "ts":
|
||||
return fmt.Sprintf("https://www.typescriptlang.org/docs/handbook/%s.html", keyword), nil
|
||||
if lowerKeyword == "mcp" || lowerKeyword == "mcp-overview" {
|
||||
return "https://docs.spring.io/spring-ai/reference/api/mcp/mcp-overview.html", nil
|
||||
}
|
||||
return fmt.Sprintf("https://docs.spring.io/spring-boot/docs/current/reference/htmlsingle/#%s", lowerKeyword), nil
|
||||
case "typescript":
|
||||
return fmt.Sprintf("https://www.typescriptlang.org/docs/handbook/%s.html", lowerKeyword), nil
|
||||
case "react":
|
||||
return fmt.Sprintf("https://react.dev/reference/react/%s", keyword), nil
|
||||
if lowerKeyword == "hooks" {
|
||||
return "https://react.dev/reference/react", nil
|
||||
}
|
||||
return fmt.Sprintf("https://react.dev/reference/react/%s", lowerKeyword), nil
|
||||
case "vue":
|
||||
return fmt.Sprintf("https://vuejs.org/guide/%s.html", keyword), nil
|
||||
if strings.Contains(lowerKeyword, "api") {
|
||||
return "https://vuejs.org/api/", nil
|
||||
}
|
||||
return fmt.Sprintf("https://vuejs.org/guide/%s.html", lowerKeyword), nil
|
||||
case "nuxt":
|
||||
return fmt.Sprintf("https://nuxt.com/docs/guide/%s", keyword), nil
|
||||
return fmt.Sprintf("https://nuxt.com/docs/guide/%s", lowerKeyword), nil
|
||||
case "docker":
|
||||
return fmt.Sprintf("https://docs.docker.com/%s", keyword), nil
|
||||
case "cloudflare", "cf":
|
||||
return fmt.Sprintf("https://developers.cloudflare.com/%s", keyword), nil
|
||||
return fmt.Sprintf("https://docs.docker.com/%s", lowerKeyword), nil
|
||||
case "cloudflare":
|
||||
return fmt.Sprintf("https://developers.cloudflare.com/%s", lowerKeyword), nil
|
||||
case "astro":
|
||||
return fmt.Sprintf("https://docs.astro.build/en/guides/%s", keyword), nil
|
||||
path := lowerKeyword
|
||||
switch lowerKeyword {
|
||||
case "components":
|
||||
path = "basics/astro-components"
|
||||
case "api":
|
||||
path = "reference/api-reference"
|
||||
case "install", "setup", "getting-started":
|
||||
path = "install-and-setup"
|
||||
default:
|
||||
if !strings.Contains(lowerKeyword, "/") {
|
||||
path = "guides/" + lowerKeyword
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("https://docs.astro.build/en/%s/", path), nil
|
||||
case "csharp":
|
||||
lowerKeyword = strings.TrimPrefix(lowerKeyword, "/")
|
||||
if strings.Contains(lowerKeyword, "regex") || strings.Contains(lowerKeyword, "regular-expression") {
|
||||
return "https://learn.microsoft.com/en-us/dotnet/standard/base-types/regular-expressions", nil
|
||||
}
|
||||
return fmt.Sprintf("https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/%s", lowerKeyword), nil
|
||||
case "kotlin":
|
||||
lowerKeyword = strings.TrimPrefix(lowerKeyword, "/")
|
||||
if lowerKeyword == "regex" || lowerKeyword == "regexp" {
|
||||
lowerKeyword = "strings"
|
||||
}
|
||||
if strings.HasSuffix(lowerKeyword, ".html") {
|
||||
return fmt.Sprintf("https://kotlinlang.org/docs/%s", lowerKeyword), nil
|
||||
}
|
||||
return fmt.Sprintf("https://kotlinlang.org/docs/%s.html", lowerKeyword), nil
|
||||
case "php":
|
||||
lowerKeyword = strings.TrimPrefix(lowerKeyword, "/")
|
||||
if strings.HasSuffix(lowerKeyword, ".php") || strings.Contains(lowerKeyword, "function.") || strings.Contains(lowerKeyword, "book.") {
|
||||
return fmt.Sprintf("https://www.php.net/manual/en/%s", lowerKeyword), nil
|
||||
}
|
||||
return fmt.Sprintf("https://www.php.net/manual/en/book.%s.php", lowerKeyword), nil
|
||||
case "ruby":
|
||||
keyword = strings.TrimPrefix(keyword, "/")
|
||||
switch strings.ToLower(keyword) {
|
||||
case "regex", "regexp":
|
||||
keyword = "Regexp"
|
||||
case "string":
|
||||
keyword = "String"
|
||||
case "array":
|
||||
keyword = "Array"
|
||||
default:
|
||||
if !strings.Contains(keyword, "::") && len(keyword) > 0 {
|
||||
keyword = strings.ToUpper(keyword[:1]) + strings.ToLower(keyword[1:])
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("https://ruby-doc.org/core/%s.html", keyword), nil
|
||||
case "elixir":
|
||||
keyword = strings.TrimPrefix(keyword, "/")
|
||||
switch strings.ToLower(keyword) {
|
||||
case "regex":
|
||||
keyword = "Regex"
|
||||
case "string":
|
||||
keyword = "String"
|
||||
case "enum":
|
||||
keyword = "Enum"
|
||||
default:
|
||||
if len(keyword) > 0 {
|
||||
keyword = strings.ToUpper(keyword[:1]) + strings.ToLower(keyword[1:])
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("https://hexdocs.pm/elixir/%s.html", keyword), nil
|
||||
case "nextjs":
|
||||
if strings.Contains(lowerKeyword, "routing") {
|
||||
return "https://nextjs.org/docs/app/building-your-application/routing", nil
|
||||
}
|
||||
if strings.Contains(lowerKeyword, "data") || strings.Contains(lowerKeyword, "fetch") {
|
||||
return "https://nextjs.org/docs/app/building-your-application/data-fetching", nil
|
||||
}
|
||||
return "https://nextjs.org/docs", nil
|
||||
case "svelte":
|
||||
if strings.Contains(lowerKeyword, "kit") {
|
||||
return "https://svelte.dev/docs/kit", nil
|
||||
}
|
||||
return "https://svelte.dev/docs/svelte/overview", nil
|
||||
case "angular":
|
||||
if strings.Contains(lowerKeyword, "http") {
|
||||
return "https://angular.dev/guide/http", nil
|
||||
}
|
||||
return "https://angular.dev/guide/components", nil
|
||||
case "remix":
|
||||
if strings.Contains(lowerKeyword, "route") {
|
||||
return "https://v2.remix.run/docs/file-conventions/routes", nil
|
||||
}
|
||||
return "https://v2.remix.run/docs", nil
|
||||
case "solid":
|
||||
// Solid docs are published from this repository and include solid-router content.
|
||||
return "https://github.com/solidjs/solid-docs", nil
|
||||
case "express":
|
||||
if strings.Contains(lowerKeyword, "routing") {
|
||||
return "https://expressjs.com/en/guide/routing.html", nil
|
||||
}
|
||||
if strings.Contains(lowerKeyword, "middleware") {
|
||||
return "https://expressjs.com/en/guide/using-middleware.html", nil
|
||||
}
|
||||
return "https://expressjs.com/en/guide/writing-middleware.html", nil
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported language: %s. Supported languages: go, rust, python, java, spring, typescript, react, vue, nuxt, docker, cloudflare, astro", language)
|
||||
return "", fmt.Errorf("unsupported language: %s. Supported: %s", language, strings.Join(supportedLanguages(), ", "))
|
||||
}
|
||||
}
|
||||
|
||||
func mapLanguageToType(language string) string {
|
||||
language, _ = normalizeLanguage(language)
|
||||
switch language {
|
||||
case "go", "golang":
|
||||
case "go":
|
||||
return "godocs"
|
||||
case "rust":
|
||||
return "rustdocs"
|
||||
case "python", "py":
|
||||
case "python":
|
||||
return "pythondocs"
|
||||
case "java":
|
||||
return "javadocs"
|
||||
case "spring":
|
||||
return "springdocs"
|
||||
case "typescript", "ts":
|
||||
case "typescript":
|
||||
return "tsdocs"
|
||||
case "react":
|
||||
return "reactdocs"
|
||||
@@ -126,11 +225,80 @@ func mapLanguageToType(language string) string {
|
||||
return "nuxtdocs"
|
||||
case "docker":
|
||||
return "dockerdocs"
|
||||
case "cloudflare", "cf":
|
||||
case "cloudflare":
|
||||
return "cloudflaredocs"
|
||||
case "astro":
|
||||
return "astrodocs"
|
||||
case "csharp", "kotlin", "php", "ruby", "elixir", "nextjs", "svelte", "angular", "remix", "express":
|
||||
return "url"
|
||||
case "solid":
|
||||
return "github"
|
||||
default:
|
||||
return "web"
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeLanguage(language string) (string, bool) {
|
||||
language = strings.ToLower(strings.TrimSpace(language))
|
||||
if language == "" {
|
||||
return "", false
|
||||
}
|
||||
if canonical, ok := languageAliases()[language]; ok {
|
||||
return canonical, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func languageAliases() map[string]string {
|
||||
return map[string]string{
|
||||
"go": "go",
|
||||
"golang": "go",
|
||||
"rust": "rust",
|
||||
"python": "python",
|
||||
"py": "python",
|
||||
"java": "java",
|
||||
"spring": "spring",
|
||||
"typescript": "typescript",
|
||||
"ts": "typescript",
|
||||
"react": "react",
|
||||
"vue": "vue",
|
||||
"nuxt": "nuxt",
|
||||
"docker": "docker",
|
||||
"cloudflare": "cloudflare",
|
||||
"cf": "cloudflare",
|
||||
"astro": "astro",
|
||||
"csharp": "csharp",
|
||||
"cs": "csharp",
|
||||
"kotlin": "kotlin",
|
||||
"kt": "kotlin",
|
||||
"php": "php",
|
||||
"ruby": "ruby",
|
||||
"rb": "ruby",
|
||||
"elixir": "elixir",
|
||||
"ex": "elixir",
|
||||
"next": "nextjs",
|
||||
"nextjs": "nextjs",
|
||||
"svelte": "svelte",
|
||||
"angular": "angular",
|
||||
"ng": "angular",
|
||||
"remix": "remix",
|
||||
"solid": "solid",
|
||||
"solidjs": "solid",
|
||||
"express": "express",
|
||||
"expressjs": "express",
|
||||
}
|
||||
}
|
||||
|
||||
func supportedLanguages() []string {
|
||||
seen := map[string]bool{}
|
||||
out := make([]string, 0)
|
||||
for _, canonical := range languageAliases() {
|
||||
if seen[canonical] {
|
||||
continue
|
||||
}
|
||||
seen[canonical] = true
|
||||
out = append(out, canonical)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
|
||||
+121
@@ -0,0 +1,121 @@
|
||||
package cmd
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestConstructDocURL_SupportedLanguages(t *testing.T) {
|
||||
tests := []struct {
|
||||
language string
|
||||
keyword string
|
||||
wantURL string
|
||||
}{
|
||||
{"go", "net/http", "https://pkg.go.dev/net/http"},
|
||||
{"rust", "tokio", "https://docs.rs/tokio/latest/tokio/"},
|
||||
{"python", "asyncio", "https://docs.python.org/3/library/asyncio.html"},
|
||||
{"java", "java/util/list", "https://docs.oracle.com/javase/8/docs/api/java/util/list.html"},
|
||||
{"spring", "mcp", "https://docs.spring.io/spring-ai/reference/api/mcp/mcp-overview.html"},
|
||||
{"typescript", "utility-types", "https://www.typescriptlang.org/docs/handbook/utility-types.html"},
|
||||
{"react", "hooks", "https://react.dev/reference/react"},
|
||||
{"vue", "essentials/reactivity-fundamentals", "https://vuejs.org/guide/essentials/reactivity-fundamentals.html"},
|
||||
{"nuxt", "directory-structure", "https://nuxt.com/docs/guide/directory-structure"},
|
||||
{"docker", "compose", "https://docs.docker.com/compose"},
|
||||
{"cloudflare", "workers", "https://developers.cloudflare.com/workers"},
|
||||
{"astro", "components", "https://docs.astro.build/en/basics/astro-components/"},
|
||||
{"csharp", "regex", "https://learn.microsoft.com/en-us/dotnet/standard/base-types/regular-expressions"},
|
||||
{"kotlin", "regex", "https://kotlinlang.org/docs/strings.html"},
|
||||
{"php", "pcre", "https://www.php.net/manual/en/book.pcre.php"},
|
||||
{"ruby", "Regexp", "https://ruby-doc.org/core/Regexp.html"},
|
||||
{"elixir", "String", "https://hexdocs.pm/elixir/String.html"},
|
||||
{"nextjs", "routing", "https://nextjs.org/docs/app/building-your-application/routing"},
|
||||
{"svelte", "kit", "https://svelte.dev/docs/kit"},
|
||||
{"angular", "http", "https://angular.dev/guide/http"},
|
||||
{"remix", "routes", "https://v2.remix.run/docs/file-conventions/routes"},
|
||||
{"solid", "signals", "https://github.com/solidjs/solid-docs"},
|
||||
{"express", "routing", "https://expressjs.com/en/guide/routing.html"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.language+"_"+tt.keyword, func(t *testing.T) {
|
||||
got, err := constructDocURL(tt.language, tt.keyword)
|
||||
if err != nil {
|
||||
t.Fatalf("constructDocURL(%q, %q) returned error: %v", tt.language, tt.keyword, err)
|
||||
}
|
||||
if got != tt.wantURL {
|
||||
t.Fatalf("constructDocURL(%q, %q) = %q, want %q", tt.language, tt.keyword, got, tt.wantURL)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConstructDocURL_UnsupportedLanguage(t *testing.T) {
|
||||
if _, err := constructDocURL("haskell", "regex-tdfa"); err == nil {
|
||||
t.Fatal("constructDocURL should return an error for unsupported language")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMapLanguageToType(t *testing.T) {
|
||||
tests := []struct {
|
||||
language string
|
||||
wantType string
|
||||
}{
|
||||
{"go", "godocs"},
|
||||
{"golang", "godocs"},
|
||||
{"rust", "rustdocs"},
|
||||
{"python", "pythondocs"},
|
||||
{"py", "pythondocs"},
|
||||
{"java", "javadocs"},
|
||||
{"spring", "springdocs"},
|
||||
{"typescript", "tsdocs"},
|
||||
{"ts", "tsdocs"},
|
||||
{"react", "reactdocs"},
|
||||
{"vue", "vuedocs"},
|
||||
{"nuxt", "nuxtdocs"},
|
||||
{"docker", "dockerdocs"},
|
||||
{"cloudflare", "cloudflaredocs"},
|
||||
{"cf", "cloudflaredocs"},
|
||||
{"astro", "astrodocs"},
|
||||
{"csharp", "url"},
|
||||
{"kotlin", "url"},
|
||||
{"php", "url"},
|
||||
{"ruby", "url"},
|
||||
{"elixir", "url"},
|
||||
{"nextjs", "url"},
|
||||
{"next", "url"},
|
||||
{"svelte", "url"},
|
||||
{"angular", "url"},
|
||||
{"ng", "url"},
|
||||
{"remix", "url"},
|
||||
{"solidjs", "github"},
|
||||
{"expressjs", "url"},
|
||||
{"unknown", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.language, func(t *testing.T) {
|
||||
got := mapLanguageToType(tt.language)
|
||||
if got != tt.wantType {
|
||||
t.Fatalf("mapLanguageToType(%q) = %q, want %q", tt.language, got, tt.wantType)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeLanguage(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want string
|
||||
ok bool
|
||||
}{
|
||||
{"go", "go", true},
|
||||
{"golang", "go", true},
|
||||
{"next", "nextjs", true},
|
||||
{"solidjs", "solid", true},
|
||||
{"expressjs", "express", true},
|
||||
{"unknown", "", false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got, ok := normalizeLanguage(tt.in)
|
||||
if got != tt.want || ok != tt.ok {
|
||||
t.Fatalf("normalizeLanguage(%q) = (%q,%v), want (%q,%v)", tt.in, got, ok, tt.want, tt.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
+5
-61
@@ -6,6 +6,7 @@ import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
appconfig "github.com/yourorg/devour/internal/config"
|
||||
)
|
||||
|
||||
var initCmd = &cobra.Command{
|
||||
@@ -53,7 +54,10 @@ func runInit(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
// Create default config
|
||||
config := generateDefaultConfig(initRemote)
|
||||
config, err := appconfig.RenderInitYAML(initRemote)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to render default config: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(configPath, []byte(config), 0644); err != nil {
|
||||
return fmt.Errorf("failed to write config: %w", err)
|
||||
}
|
||||
@@ -82,63 +86,3 @@ func runInit(cmd *cobra.Command, args []string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateDefaultConfig(remote bool) string {
|
||||
mode := "local"
|
||||
if remote {
|
||||
mode = "remote"
|
||||
}
|
||||
|
||||
return fmt.Sprintf(`# Devour Configuration
|
||||
version: 1
|
||||
|
||||
# Storage paths
|
||||
storage:
|
||||
docs_dir: ./devour_data/docs
|
||||
index_dir: ./devour_data/index
|
||||
metadata_dir: ./devour_data/metadata
|
||||
|
||||
# Embedding settings
|
||||
embeddings:
|
||||
provider: openai
|
||||
model: text-embedding-3-small
|
||||
dimensions: 1536
|
||||
api_key: ${OPENAI_API_KEY}
|
||||
batch_size: 100
|
||||
|
||||
# Vector database
|
||||
vector_db:
|
||||
type: chromem
|
||||
persist: true
|
||||
similarity_metric: cosine
|
||||
|
||||
# Scraping settings
|
||||
scraper:
|
||||
user_agent: "Devour/1.0"
|
||||
timeout: 30s
|
||||
retry_count: 3
|
||||
concurrency: 10
|
||||
rate_limit: 500ms
|
||||
max_depth: 3
|
||||
cache_dir: ./devour_data/cache
|
||||
|
||||
# Scheduler
|
||||
scheduler:
|
||||
enabled: true
|
||||
interval: 72h
|
||||
check_method: hash
|
||||
|
||||
# Server settings
|
||||
server:
|
||||
mode: %s
|
||||
port: 8080
|
||||
host: localhost
|
||||
|
||||
# Sources (add your own)
|
||||
sources: []
|
||||
# - name: example-docs
|
||||
# type: url
|
||||
# url: https://docs.example.com
|
||||
# include: ["**/*.md", "**/*.html"]
|
||||
`, mode)
|
||||
}
|
||||
|
||||
+67
-95
@@ -1,118 +1,90 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var languagesFormat string
|
||||
|
||||
var languagesCmd = &cobra.Command{
|
||||
Use: "languages",
|
||||
Short: "Show supported languages and their mappings",
|
||||
Long: `Display all supported languages for the 'devour get' command
|
||||
along with their base URLs and examples.
|
||||
|
||||
This helps you discover what documentation sources are available
|
||||
and how to reference them quickly.`,
|
||||
Short: "Show supported languages and aliases",
|
||||
Long: `Display all supported languages for 'devour get' and 'devour ask'
|
||||
with aliases and starter examples.`,
|
||||
RunE: runLanguages,
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(languagesCmd)
|
||||
languagesCmd.Flags().StringVar(&languagesFormat, "format", "text", "output format (text, json)")
|
||||
}
|
||||
|
||||
type languageInfo struct {
|
||||
Canonical string `json:"canonical"`
|
||||
Aliases []string `json:"aliases"`
|
||||
Example string `json:"example"`
|
||||
Source string `json:"source"`
|
||||
}
|
||||
|
||||
func runLanguages(cmd *cobra.Command, args []string) error {
|
||||
fmt.Println("🌐 Devour Supported Languages")
|
||||
fmt.Println("═══════════════════════════════════════════════════════════════")
|
||||
fmt.Println()
|
||||
|
||||
languages := []struct {
|
||||
langs []string
|
||||
url string
|
||||
examples []string
|
||||
}{
|
||||
{
|
||||
langs: []string{"go", "golang"},
|
||||
url: "https://pkg.go.dev/{package}",
|
||||
examples: []string{"devour get go http", "devour get go fmt", "devour get golang json"},
|
||||
},
|
||||
{
|
||||
langs: []string{"rust"},
|
||||
url: "https://docs.rs/{crate}/latest/{crate}/",
|
||||
examples: []string{"devour get rust tokio", "devour get rust serde", "devour get rust clap"},
|
||||
},
|
||||
{
|
||||
langs: []string{"python", "py"},
|
||||
url: "https://docs.python.org/3/library/{module}.html",
|
||||
examples: []string{"devour get python asyncio", "devour get py requests", "devour get python stdlib"},
|
||||
},
|
||||
{
|
||||
langs: []string{"java"},
|
||||
url: "https://docs.oracle.com/javase/8/docs/api/{package}.html",
|
||||
examples: []string{"devour get java string", "devour get java arraylist"},
|
||||
},
|
||||
{
|
||||
langs: []string{"spring"},
|
||||
url: "https://docs.spring.io/spring-boot/docs/current/reference/htmlsingle/#{section}",
|
||||
examples: []string{"devour get spring boot", "devour get spring testing"},
|
||||
},
|
||||
{
|
||||
langs: []string{"typescript", "ts"},
|
||||
url: "https://www.typescriptlang.org/docs/handbook/{topic}.html",
|
||||
examples: []string{"devour get typescript interfaces", "devour get ts decorators"},
|
||||
},
|
||||
{
|
||||
langs: []string{"react"},
|
||||
url: "https://react.dev/reference/react/{feature}",
|
||||
examples: []string{"devour get react hooks", "devour get react components", "devour get react state"},
|
||||
},
|
||||
{
|
||||
langs: []string{"vue"},
|
||||
url: "https://vuejs.org/guide/{topic}.html",
|
||||
examples: []string{"devour get vue components", "devour get vue reactivity"},
|
||||
},
|
||||
{
|
||||
langs: []string{"nuxt"},
|
||||
url: "https://nuxt.com/docs/guide/{topic}",
|
||||
examples: []string{"devour get nuxt routing", "devour get nuxt middleware"},
|
||||
},
|
||||
{
|
||||
langs: []string{"docker"},
|
||||
url: "https://docs.docker.com/{topic}",
|
||||
examples: []string{"devour get docker compose", "devour get docker build", "devour get docker networking"},
|
||||
},
|
||||
{
|
||||
langs: []string{"cloudflare", "cf"},
|
||||
url: "https://developers.cloudflare.com/{topic}",
|
||||
examples: []string{"devour get cloudflare workers", "devour get cf pages", "devour get cloudflare dns"},
|
||||
},
|
||||
{
|
||||
langs: []string{"astro"},
|
||||
url: "https://docs.astro.build/en/guides/{topic}",
|
||||
examples: []string{"devour get astro routing", "devour get astro components"},
|
||||
},
|
||||
rows := []languageInfo{
|
||||
{Canonical: "go", Aliases: []string{"go", "golang"}, Example: "devour get go http", Source: "pkg.go.dev"},
|
||||
{Canonical: "rust", Aliases: []string{"rust"}, Example: "devour get rust tokio", Source: "docs.rs"},
|
||||
{Canonical: "python", Aliases: []string{"python", "py"}, Example: "devour get python asyncio", Source: "docs.python.org"},
|
||||
{Canonical: "java", Aliases: []string{"java"}, Example: "devour get java string", Source: "docs.oracle.com"},
|
||||
{Canonical: "spring", Aliases: []string{"spring"}, Example: "devour get spring mcp", Source: "docs.spring.io"},
|
||||
{Canonical: "typescript", Aliases: []string{"typescript", "ts"}, Example: "devour get ts interfaces", Source: "typescriptlang.org"},
|
||||
{Canonical: "react", Aliases: []string{"react"}, Example: "devour get react hooks", Source: "react.dev"},
|
||||
{Canonical: "vue", Aliases: []string{"vue"}, Example: "devour get vue reactivity", Source: "vuejs.org"},
|
||||
{Canonical: "nuxt", Aliases: []string{"nuxt"}, Example: "devour get nuxt routing", Source: "nuxt.com"},
|
||||
{Canonical: "docker", Aliases: []string{"docker"}, Example: "devour get docker compose", Source: "docs.docker.com"},
|
||||
{Canonical: "cloudflare", Aliases: []string{"cloudflare", "cf"}, Example: "devour get cloudflare workers", Source: "developers.cloudflare.com"},
|
||||
{Canonical: "astro", Aliases: []string{"astro"}, Example: "devour get astro components", Source: "docs.astro.build"},
|
||||
{Canonical: "csharp", Aliases: []string{"csharp", "cs"}, Example: "devour get csharp regex", Source: "learn.microsoft.com"},
|
||||
{Canonical: "kotlin", Aliases: []string{"kotlin", "kt"}, Example: "devour get kotlin strings", Source: "kotlinlang.org"},
|
||||
{Canonical: "php", Aliases: []string{"php"}, Example: "devour get php pcre", Source: "php.net"},
|
||||
{Canonical: "ruby", Aliases: []string{"ruby", "rb"}, Example: "devour get ruby Regexp", Source: "ruby-doc.org"},
|
||||
{Canonical: "elixir", Aliases: []string{"elixir", "ex"}, Example: "devour get elixir String", Source: "hexdocs.pm"},
|
||||
{Canonical: "nextjs", Aliases: []string{"next", "nextjs"}, Example: "devour get nextjs routing", Source: "nextjs.org"},
|
||||
{Canonical: "svelte", Aliases: []string{"svelte"}, Example: "devour get svelte kit", Source: "svelte.dev"},
|
||||
{Canonical: "angular", Aliases: []string{"angular", "ng"}, Example: "devour get angular http", Source: "angular.dev"},
|
||||
{Canonical: "remix", Aliases: []string{"remix"}, Example: "devour get remix routes", Source: "v2.remix.run"},
|
||||
{Canonical: "solid", Aliases: []string{"solid", "solidjs"}, Example: "devour get solid router", Source: "github.com/solidjs/solid-docs"},
|
||||
{Canonical: "express", Aliases: []string{"express", "expressjs"}, Example: "devour get express middleware", Source: "expressjs.com"},
|
||||
}
|
||||
|
||||
for _, lang := range languages {
|
||||
fmt.Printf("🔷 %s\n", strings.Join(lang.langs, ", "))
|
||||
fmt.Printf(" URL: %s\n", lang.url)
|
||||
fmt.Printf(" Examples:\n")
|
||||
for _, example := range lang.examples {
|
||||
fmt.Printf(" • %s\n", example)
|
||||
switch strings.ToLower(strings.TrimSpace(languagesFormat)) {
|
||||
case "json":
|
||||
out := struct {
|
||||
Count int `json:"count"`
|
||||
Languages []languageInfo `json:"languages"`
|
||||
}{
|
||||
Count: len(rows),
|
||||
Languages: rows,
|
||||
}
|
||||
fmt.Println()
|
||||
enc := json.NewEncoder(cmd.OutOrStdout())
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(out)
|
||||
case "text", "":
|
||||
printLanguagesText(cmd.OutOrStdout(), rows)
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unsupported format: %s", languagesFormat)
|
||||
}
|
||||
|
||||
fmt.Println("💡 Pro Tips:")
|
||||
fmt.Println(" • Use 'devour get <language> help' for language-specific help")
|
||||
fmt.Println(" • Add --format markdown for enhanced documentation")
|
||||
fmt.Println(" • Most languages support common aliases (e.g., py → python)")
|
||||
fmt.Println()
|
||||
fmt.Println("🚀 Quick Start:")
|
||||
fmt.Println(" devour get go http --format markdown")
|
||||
fmt.Println(" devour get python asyncio")
|
||||
fmt.Println(" devour get react hooks")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func printLanguagesText(out io.Writer, rows []languageInfo) {
|
||||
_, _ = fmt.Fprintln(out, "Devour Supported Languages")
|
||||
_, _ = fmt.Fprintln(out, "============================================")
|
||||
_, _ = fmt.Fprintln(out)
|
||||
for _, row := range rows {
|
||||
_, _ = fmt.Fprintf(out, "- %s (%s)\n", row.Canonical, strings.Join(row.Aliases, ", "))
|
||||
_, _ = fmt.Fprintf(out, " source: %s\n", row.Source)
|
||||
_, _ = fmt.Fprintf(out, " example: %s\n\n", row.Example)
|
||||
}
|
||||
_, _ = fmt.Fprintln(out, "Tip: use 'devour get <language> <keyword> --format markdown' for readable output.")
|
||||
}
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLanguagesJSONFormat(t *testing.T) {
|
||||
prev := languagesFormat
|
||||
defer func() { languagesFormat = prev }()
|
||||
languagesFormat = "json"
|
||||
|
||||
var buf bytes.Buffer
|
||||
languagesCmd.SetOut(&buf)
|
||||
|
||||
if err := runLanguages(languagesCmd, nil); err != nil {
|
||||
t.Fatalf("runLanguages returned error: %v", err)
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
Count int `json:"count"`
|
||||
Languages []struct {
|
||||
Canonical string `json:"canonical"`
|
||||
Aliases []string `json:"aliases"`
|
||||
} `json:"languages"`
|
||||
}
|
||||
if err := json.Unmarshal(buf.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("invalid json output: %v", err)
|
||||
}
|
||||
if payload.Count == 0 || len(payload.Languages) == 0 {
|
||||
t.Fatalf("expected non-empty languages payload, got %+v", payload)
|
||||
}
|
||||
|
||||
foundNext := false
|
||||
for _, l := range payload.Languages {
|
||||
if l.Canonical == "nextjs" {
|
||||
foundNext = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !foundNext {
|
||||
t.Fatalf("expected nextjs in JSON payload, got %+v", payload.Languages)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLanguagesTextFormat(t *testing.T) {
|
||||
prev := languagesFormat
|
||||
defer func() { languagesFormat = prev }()
|
||||
languagesFormat = "text"
|
||||
|
||||
var buf bytes.Buffer
|
||||
languagesCmd.SetOut(&buf)
|
||||
|
||||
if err := runLanguages(languagesCmd, nil); err != nil {
|
||||
t.Fatalf("runLanguages returned error: %v", err)
|
||||
}
|
||||
out := buf.String()
|
||||
if !strings.Contains(out, "Devour Supported Languages") {
|
||||
t.Fatalf("unexpected text output: %q", out)
|
||||
}
|
||||
}
|
||||
+78
-26
@@ -1,25 +1,32 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
"github.com/yourorg/devour/internal/search"
|
||||
"github.com/yourorg/devour/internal/storage"
|
||||
)
|
||||
|
||||
var pushCmd = &cobra.Command{
|
||||
Use: "push <path>",
|
||||
Short: "Push documents to remote MCP server",
|
||||
Long: `Push local documents to a remote Devour MCP server.
|
||||
Short: "Import local documents into Devour storage/index",
|
||||
Long: `Push local documents into your Devour local workspace.
|
||||
|
||||
Useful for:
|
||||
- Syncing local documentation to a shared server
|
||||
- Backing up indexed content
|
||||
- Contributing to a team knowledge base
|
||||
Current stable behavior:
|
||||
- local ingest into docs storage
|
||||
- local reindex for query/ask/status
|
||||
|
||||
Remote push is experimental and not enabled by default.
|
||||
|
||||
Examples:
|
||||
devour push ./docs
|
||||
devour push ./docs --server http://devour.company.com
|
||||
devour push ./docs --server http://localhost:8080 --project my-project`,
|
||||
devour push ./docs --project my-project`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: runPush,
|
||||
}
|
||||
@@ -30,33 +37,78 @@ var (
|
||||
)
|
||||
|
||||
func init() {
|
||||
pushCmd.Flags().StringVar(&pushServer, "server", "", "remote Devour server URL")
|
||||
pushCmd.Flags().StringVarP(&pushProject, "project", "p", "", "project name on remote server")
|
||||
pushCmd.Flags().StringVar(&pushServer, "server", "", "remote Devour server URL (experimental)")
|
||||
pushCmd.Flags().StringVarP(&pushProject, "project", "p", "", "project name label")
|
||||
}
|
||||
|
||||
func runPush(cmd *cobra.Command, args []string) error {
|
||||
path := args[0]
|
||||
|
||||
if pushServer == "" {
|
||||
// Try to get from config
|
||||
pushServer = "http://localhost:8080"
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return fmt.Errorf("path does not exist: %s", path)
|
||||
}
|
||||
|
||||
fmt.Printf("📤 Pushing to: %s\n", pushServer)
|
||||
fmt.Printf(" Path: %s\n", path)
|
||||
if pushProject != "" {
|
||||
fmt.Printf(" Project: %s\n", pushProject)
|
||||
cfg, err := loadAppConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: Implement actual push logic
|
||||
// 1. Scan path for documents
|
||||
// 2. Connect to remote server
|
||||
// 3. Upload documents
|
||||
// 4. Wait for indexing confirmation
|
||||
server := strings.TrimSpace(pushServer)
|
||||
if server != "" && !isLocalServer(server) {
|
||||
return fmt.Errorf("remote push is experimental and not enabled in this build; use local push without --server")
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("⚠️ Push functionality not yet implemented")
|
||||
fmt.Println(" Remote server support coming soon")
|
||||
projectName := strings.TrimSpace(pushProject)
|
||||
if projectName == "" {
|
||||
projectName = "local-push"
|
||||
}
|
||||
|
||||
fmt.Printf("📤 Ingesting local docs from: %s\n", path)
|
||||
fmt.Printf(" Project: %s\n", projectName)
|
||||
fmt.Printf(" Target docs dir: %s\n", cfg.Storage.DocsDir)
|
||||
|
||||
s := scraper.NewScraper(scraper.SourceTypeLocal, toScraperConfig(cfg, 0))
|
||||
if s == nil {
|
||||
return fmt.Errorf("local scraper not available")
|
||||
}
|
||||
|
||||
docs, err := s.Scrape(context.Background(), &scraper.Source{
|
||||
Name: projectName,
|
||||
Type: scraper.SourceTypeLocal,
|
||||
Path: path,
|
||||
Include: []string{`.*`},
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("local ingest failed: %w", err)
|
||||
}
|
||||
|
||||
saved, err := storage.SaveDocuments(docs, storage.SaveOptions{
|
||||
Format: "json",
|
||||
OutputDir: cfg.Storage.DocsDir,
|
||||
AllowEmpty: false,
|
||||
PrintWriter: nil,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("save docs failed: %w", err)
|
||||
}
|
||||
|
||||
engine := search.NewEngine(cfg)
|
||||
stats, err := engine.Rebuild(context.Background())
|
||||
if err != nil {
|
||||
return fmt.Errorf("reindex failed: %w", err)
|
||||
}
|
||||
|
||||
fmt.Println("\n✓ Push complete")
|
||||
fmt.Printf(" Documents imported: %d\n", saved.Count)
|
||||
fmt.Printf(" Index docs: %d\n", stats.Documents)
|
||||
fmt.Printf(" Index path: %s\n", stats.IndexPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
func isLocalServer(raw string) bool {
|
||||
u, err := url.Parse(raw)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
return host == "" || host == "localhost" || host == "127.0.0.1"
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
@@ -218,6 +219,7 @@ func runQualityScan(cmd *cobra.Command, args []string) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("scan failed: %w", err)
|
||||
}
|
||||
result.Findings = quality.AttachDocsEvidence(lang, result.Findings)
|
||||
|
||||
return outputScanResult(result, qualityFormat)
|
||||
}
|
||||
@@ -256,9 +258,11 @@ func runQualityStatus(cmd *cobra.Command, args []string) error {
|
||||
return json.NewEncoder(os.Stdout).Encode(scorecard)
|
||||
case "strict":
|
||||
fmt.Println(scorer.FormatStrictScorecard(findings, lastScan))
|
||||
printQualityEvidenceSummary(findings)
|
||||
return nil
|
||||
default:
|
||||
fmt.Println(scorer.FormatScorecard(scorecard))
|
||||
printQualityEvidenceSummary(findings)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -318,6 +322,17 @@ func runQualityNext(cmd *cobra.Command, args []string) error {
|
||||
fmt.Printf("Score: %d\n", next.Score)
|
||||
fmt.Printf("ID: %s\n", next.ID)
|
||||
fmt.Printf("\nDescription:\n%s\n", next.Description)
|
||||
if next.Metadata != nil {
|
||||
if urls := strings.TrimSpace(next.Metadata["docs_evidence_urls"]); urls != "" {
|
||||
fmt.Printf("\nEvidence Docs:\n%s\n", urls)
|
||||
}
|
||||
if rationale := strings.TrimSpace(next.Metadata["docs_evidence_rationale"]); rationale != "" {
|
||||
fmt.Printf("\nRationale:\n%s\n", rationale)
|
||||
}
|
||||
if confidence := strings.TrimSpace(next.Metadata["docs_evidence_confidence"]); confidence != "" {
|
||||
fmt.Printf("Evidence confidence: %s\n", confidence)
|
||||
}
|
||||
}
|
||||
|
||||
if explain {
|
||||
fmt.Printf("\nExplanation:\n")
|
||||
@@ -693,3 +708,27 @@ func importReviewResponses(dataDir string, filename string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func printQualityEvidenceSummary(findings []quality.Finding) {
|
||||
totalWithEvidence := 0
|
||||
for _, f := range findings {
|
||||
if f.Metadata != nil && strings.TrimSpace(f.Metadata["docs_evidence_urls"]) != "" {
|
||||
totalWithEvidence++
|
||||
}
|
||||
}
|
||||
if totalWithEvidence == 0 {
|
||||
return
|
||||
}
|
||||
fmt.Printf("\nEvidence-linked findings: %d/%d\n", totalWithEvidence, len(findings))
|
||||
for _, f := range findings {
|
||||
if f.Metadata == nil {
|
||||
continue
|
||||
}
|
||||
urls := strings.TrimSpace(f.Metadata["docs_evidence_urls"])
|
||||
if urls == "" {
|
||||
continue
|
||||
}
|
||||
fmt.Printf(" • %s:%d - %s\n %s\n", filepath.Base(f.File), f.Line, f.Title, urls)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
+100
-18
@@ -1,9 +1,14 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
appconfig "github.com/yourorg/devour/internal/config"
|
||||
"github.com/yourorg/devour/internal/search"
|
||||
)
|
||||
|
||||
var queryCmd = &cobra.Command{
|
||||
@@ -29,32 +34,109 @@ var (
|
||||
)
|
||||
|
||||
func init() {
|
||||
queryCmd.Flags().IntVarP(&queryLimit, "limit", "l", 5, "maximum number of results")
|
||||
queryCmd.Flags().IntVarP(&queryLimit, "limit", "n", 5, "maximum number of results")
|
||||
queryCmd.Flags().StringVarP(&queryFormat, "format", "f", "text", "output format (text, json, markdown)")
|
||||
queryCmd.Flags().Float64Var(&queryThreshold, "threshold", 0.7, "similarity threshold (0-1)")
|
||||
queryCmd.Flags().Float64Var(&queryThreshold, "threshold", 0, "minimum lexical score threshold")
|
||||
}
|
||||
|
||||
func runQuery(cmd *cobra.Command, args []string) error {
|
||||
query := args[0]
|
||||
if len(args) > 1 {
|
||||
query = fmt.Sprintf("%s", args)
|
||||
query := strings.TrimSpace(strings.Join(args, " "))
|
||||
if query == "" {
|
||||
return fmt.Errorf("query cannot be empty")
|
||||
}
|
||||
|
||||
fmt.Printf("Searching: %q\n", query)
|
||||
fmt.Printf(" Limit: %d\n", queryLimit)
|
||||
fmt.Printf(" Threshold: %.2f\n", queryThreshold)
|
||||
fmt.Println()
|
||||
cfg, err := loadAppConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: Implement actual query logic
|
||||
// 1. Generate embedding for query
|
||||
// 2. Search vector database
|
||||
// 3. Format and return results
|
||||
engine := search.NewEngine(cfg)
|
||||
results, stats, err := engine.Search(context.Background(), query, search.SearchOptions{
|
||||
Limit: queryLimit,
|
||||
Threshold: queryThreshold,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("query failed: %w", err)
|
||||
}
|
||||
|
||||
// Placeholder results
|
||||
fmt.Println("Results:")
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("⚠️ Query functionality not yet implemented")
|
||||
fmt.Println(" Index some documents first with 'devour scrape'")
|
||||
switch strings.ToLower(queryFormat) {
|
||||
case "json":
|
||||
resp := map[string]any{
|
||||
"query": query,
|
||||
"limit": queryLimit,
|
||||
"threshold": queryThreshold,
|
||||
"count": len(results),
|
||||
"results": results,
|
||||
"indexed_at": stats.LastIndexedAt,
|
||||
"documents": stats.Documents,
|
||||
}
|
||||
enc := json.NewEncoder(cmd.OutOrStdout())
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(resp)
|
||||
case "markdown":
|
||||
return printQueryMarkdown(cmd, query, cfg, results, stats)
|
||||
case "text":
|
||||
return printQueryText(cmd, query, cfg, results, stats)
|
||||
default:
|
||||
return fmt.Errorf("unsupported format: %s (supported: text, json, markdown)", queryFormat)
|
||||
}
|
||||
}
|
||||
|
||||
func printQueryText(cmd *cobra.Command, query string, cfg *appconfig.Config, results []search.Result, stats *search.IndexStats) error {
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "Searching: %q\n", query)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Limit: %d\n", queryLimit)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Threshold: %.2f\n", queryThreshold)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Indexed docs: %d\n", stats.Documents)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Docs dir: %s\n\n", cfg.Storage.DocsDir)
|
||||
|
||||
if len(results) == 0 {
|
||||
fmt.Fprintln(cmd.OutOrStdout(), "No results found.")
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Fprintln(cmd.OutOrStdout(), "Results:")
|
||||
fmt.Fprintln(cmd.OutOrStdout(), "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
for i, r := range results {
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "%d. %s\n", i+1, r.Title)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Score: %.3f | Type: %s | Source: %s\n", r.Score, r.Type, defaultSource(r.Source))
|
||||
if r.URL != "" {
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " URL: %s\n", r.URL)
|
||||
}
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Snippet: %s\n\n", r.Snippet)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func printQueryMarkdown(cmd *cobra.Command, query string, cfg *appconfig.Config, results []search.Result, stats *search.IndexStats) error {
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "# Query Results\n\n")
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- Query: `%s`\n", query)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- Limit: `%d`\n", queryLimit)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- Threshold: `%.2f`\n", queryThreshold)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- Indexed docs: `%d`\n", stats.Documents)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- Docs dir: `%s`\n\n", cfg.Storage.DocsDir)
|
||||
|
||||
if len(results) == 0 {
|
||||
fmt.Fprintln(cmd.OutOrStdout(), "_No results found._")
|
||||
return nil
|
||||
}
|
||||
|
||||
for i, r := range results {
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "## %d. %s\n\n", i+1, r.Title)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- Score: `%.3f`\n", r.Score)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- Type: `%s`\n", r.Type)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- Source: `%s`\n", defaultSource(r.Source))
|
||||
if r.URL != "" {
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "- URL: %s\n", r.URL)
|
||||
}
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "\n%s\n\n", r.Snippet)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func defaultSource(source string) string {
|
||||
source = strings.TrimSpace(source)
|
||||
if source == "" {
|
||||
return "unknown"
|
||||
}
|
||||
return source
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
_ "github.com/yourorg/devour/internal/scraper/external"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -90,6 +91,7 @@ func main() {
|
||||
scraper.SourceTypeGitHub,
|
||||
scraper.SourceTypeOpenAPI,
|
||||
scraper.SourceTypeLocal,
|
||||
scraper.SourceTypeLocalSearch,
|
||||
scraper.SourceTypeGoDocs,
|
||||
scraper.SourceTypeRustDocs,
|
||||
scraper.SourceTypePythonDocs,
|
||||
|
||||
+6
-1
@@ -6,6 +6,7 @@ import (
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/spf13/viper"
|
||||
_ "github.com/yourorg/devour/internal/scraper/external"
|
||||
"github.com/yourorg/devour/internal/ui"
|
||||
)
|
||||
|
||||
@@ -33,7 +34,8 @@ context to AI models for generating accurate, fully working code.
|
||||
Runs in two modes:
|
||||
- Local mode: OpenCode skill running entirely on your machine
|
||||
- Remote mode: MCP server for multi-user/team access`,
|
||||
Version: "1.0.0",
|
||||
Version: "1.0.0",
|
||||
SilenceUsage: true,
|
||||
}
|
||||
|
||||
func Execute() {
|
||||
@@ -53,6 +55,7 @@ func init() {
|
||||
rootCmd.AddCommand(initCmd)
|
||||
rootCmd.AddCommand(scrapeCmd)
|
||||
rootCmd.AddCommand(getCmd)
|
||||
rootCmd.AddCommand(askCmd)
|
||||
rootCmd.AddCommand(languagesCmd)
|
||||
rootCmd.AddCommand(demoCmd)
|
||||
rootCmd.AddCommand(serveCmd)
|
||||
@@ -62,6 +65,8 @@ func init() {
|
||||
rootCmd.AddCommand(pushCmd)
|
||||
rootCmd.AddCommand(logoCmd)
|
||||
rootCmd.AddCommand(scorecardCmd)
|
||||
rootCmd.AddCommand(autoCmd)
|
||||
rootCmd.AddCommand(verifyCmd)
|
||||
}
|
||||
|
||||
// logoCmd displays the Devour character
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
package cmd
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestRootCommandsAreUnique(t *testing.T) {
|
||||
seen := map[string]bool{}
|
||||
for _, c := range rootCmd.Commands() {
|
||||
name := c.Name()
|
||||
if seen[name] {
|
||||
t.Fatalf("duplicate root command registered: %s", name)
|
||||
}
|
||||
seen[name] = true
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueryLimitShorthandIsN(t *testing.T) {
|
||||
flag := queryCmd.Flags().Lookup("limit")
|
||||
if flag == nil {
|
||||
t.Fatal("query --limit flag not found")
|
||||
}
|
||||
if flag.Shorthand != "n" {
|
||||
t.Fatalf("expected query --limit shorthand to be n, got %q", flag.Shorthand)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRootExecuteQueryNoPanic(t *testing.T) {
|
||||
rootCmd.SetArgs([]string{"query", "http client", "--limit", "1"})
|
||||
if _, err := rootCmd.ExecuteC(); err != nil {
|
||||
t.Fatalf("query execution should not panic; got error: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
appconfig "github.com/yourorg/devour/internal/config"
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
)
|
||||
|
||||
func loadAppConfig() (*appconfig.Config, error) {
|
||||
cfg, err := appconfig.Load(cfgFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cfg.EnsureStorageDirs(); err != nil {
|
||||
return nil, fmt.Errorf("ensure storage dirs: %w", err)
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func toScraperConfig(c *appconfig.Config, concurrencyOverride int) *scraper.Config {
|
||||
sc := &scraper.Config{
|
||||
UserAgent: c.Scraper.UserAgent,
|
||||
Timeout: c.Scraper.Timeout,
|
||||
RetryCount: c.Scraper.RetryCount,
|
||||
RetryDelay: c.Scraper.RetryDelay,
|
||||
Concurrency: c.Scraper.Concurrency,
|
||||
RateLimit: c.Scraper.RateLimit,
|
||||
MaxDepth: c.Scraper.MaxDepth,
|
||||
CacheDir: c.Scraper.CacheDir,
|
||||
}
|
||||
if concurrencyOverride > 0 {
|
||||
sc.Concurrency = concurrencyOverride
|
||||
}
|
||||
if sc.Timeout <= 0 {
|
||||
sc.Timeout = 30 * time.Second
|
||||
}
|
||||
if sc.RetryCount <= 0 {
|
||||
sc.RetryCount = 3
|
||||
}
|
||||
if sc.RetryDelay <= 0 {
|
||||
sc.RetryDelay = 1 * time.Second
|
||||
}
|
||||
if sc.Concurrency <= 0 {
|
||||
sc.Concurrency = 10
|
||||
}
|
||||
if sc.MaxDepth <= 0 {
|
||||
sc.MaxDepth = 2
|
||||
}
|
||||
return sc
|
||||
}
|
||||
|
||||
func sourceFromConfig(s appconfig.SourceConfig) *scraper.Source {
|
||||
return &scraper.Source{
|
||||
Name: strings.TrimSpace(s.Name),
|
||||
Type: scraper.SourceType(strings.TrimSpace(s.Type)),
|
||||
URL: strings.TrimSpace(s.URL),
|
||||
Query: strings.TrimSpace(s.Query),
|
||||
ResultLimit: s.ResultLimit,
|
||||
Domains: append([]string(nil), s.Domains...),
|
||||
Repo: strings.TrimSpace(s.Repo),
|
||||
Branch: strings.TrimSpace(s.Branch),
|
||||
Path: strings.TrimSpace(s.Path),
|
||||
Include: append([]string(nil), s.Include...),
|
||||
Exclude: append([]string(nil), s.Exclude...),
|
||||
Schedule: strings.TrimSpace(s.Schedule),
|
||||
}
|
||||
}
|
||||
|
||||
func resolveOutputDir(c *appconfig.Config, override string) string {
|
||||
if strings.TrimSpace(override) != "" {
|
||||
return override
|
||||
}
|
||||
if strings.TrimSpace(c.Storage.DocsDir) != "" {
|
||||
return c.Storage.DocsDir
|
||||
}
|
||||
return filepath.Join("devour_data", "docs")
|
||||
}
|
||||
@@ -37,7 +37,6 @@ Examples:
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(scorecardCmd)
|
||||
scorecardCmd.Flags().BoolVar(&scorecardCompact, "compact", false, "Generate compact banner only")
|
||||
scorecardCmd.Flags().BoolVar(&scorecardDetailed, "detailed", false, "Generate detailed banner only")
|
||||
scorecardCmd.Flags().StringVarP(&scorecardOutput, "output", "o", "lighthouse_scorecard", "Output filename prefix")
|
||||
|
||||
+302
-98
@@ -2,17 +2,23 @@ package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/yourorg/devour/internal/markdown"
|
||||
appconfig "github.com/yourorg/devour/internal/config"
|
||||
"github.com/yourorg/devour/internal/projectstate"
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
"github.com/yourorg/devour/internal/search"
|
||||
"github.com/yourorg/devour/internal/storage"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var scrapeCmd = &cobra.Command{
|
||||
@@ -34,144 +40,283 @@ Supported source types:
|
||||
- dockerdocs: Docker (docs.docker.com)
|
||||
- cloudflaredocs: Cloudflare (developers.cloudflare.com)
|
||||
- astrodocs: Astro (docs.astro.build)
|
||||
- localsearch: Self-hosted search API returning JSON results
|
||||
- url: Generic web pages
|
||||
- github: GitHub repositories
|
||||
- openapi: OpenAPI/Swagger specs
|
||||
- local: Local files/directories
|
||||
|
||||
Examples:
|
||||
devour scrape https://pkg.go.dev/net/http --type godocs
|
||||
devour scrape https://react.dev/reference/react --type reactdocs
|
||||
devour scrape https://developers.cloudflare.com/ --type cloudflaredocs
|
||||
devour scrape http://127.0.0.1:8080/search --type localsearch --search-query "golang http client"
|
||||
devour scrape --sources sources.yaml`,
|
||||
Args: cobra.MaximumNArgs(1),
|
||||
RunE: runScrape,
|
||||
}
|
||||
|
||||
var (
|
||||
scrapeFormat string
|
||||
scrapeSources string
|
||||
scrapeOutput string
|
||||
scrapeConcurrency int
|
||||
scrapeType string
|
||||
scrapeFormat string
|
||||
scrapeSources string
|
||||
scrapeOutput string
|
||||
scrapeConcurrency int
|
||||
scrapeType string
|
||||
scrapeSearchQuery string
|
||||
scrapeSearchLimit int
|
||||
scrapeSearchDomains []string
|
||||
scrapeInclude []string
|
||||
scrapeExclude []string
|
||||
scrapeAllowEmpty bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
scrapeCmd.Flags().StringVarP(&scrapeFormat, "format", "f", "json", "output format (json, markdown)")
|
||||
scrapeCmd.Flags().StringVarP(&scrapeSources, "sources", "s", "", "YAML file with source definitions")
|
||||
scrapeCmd.Flags().StringVarP(&scrapeOutput, "output", "o", "", "output directory (default: devour_data/docs)")
|
||||
scrapeCmd.Flags().StringVarP(&scrapeOutput, "output", "o", "", "output directory (default: configured docs dir)")
|
||||
scrapeCmd.Flags().IntVar(&scrapeConcurrency, "concurrency", 10, "parallel scraping workers")
|
||||
scrapeCmd.Flags().StringVarP(&scrapeType, "type", "t", "", "source type (auto-detected if not specified)")
|
||||
scrapeCmd.Flags().StringVar(&scrapeSearchQuery, "search-query", "", "search query for --type localsearch")
|
||||
scrapeCmd.Flags().IntVar(&scrapeSearchLimit, "search-limit", 8, "max result URLs to scrape for --type localsearch")
|
||||
scrapeCmd.Flags().StringSliceVar(&scrapeSearchDomains, "search-domain", nil, "restrict localsearch results to these domains (repeatable)")
|
||||
scrapeCmd.Flags().StringSliceVar(&scrapeInclude, "include", nil, "include URL/file regex patterns (repeatable)")
|
||||
scrapeCmd.Flags().StringSliceVar(&scrapeExclude, "exclude", nil, "exclude URL/file regex patterns (repeatable)")
|
||||
scrapeCmd.Flags().BoolVar(&scrapeAllowEmpty, "allow-empty", false, "allow success when no documents were extracted")
|
||||
}
|
||||
|
||||
func runScrape(cmd *cobra.Command, args []string) error {
|
||||
cfg, err := loadAppConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if scrapeSources != "" {
|
||||
return scrapeFromConfig(scrapeSources)
|
||||
return scrapeFromConfig(cmd, cfg, scrapeSources)
|
||||
}
|
||||
|
||||
if len(args) == 0 {
|
||||
return fmt.Errorf("source argument required when not using --sources flag")
|
||||
}
|
||||
|
||||
sourceURL := args[0]
|
||||
|
||||
config := &scraper.Config{
|
||||
UserAgent: "Devour/1.0 (Documentation Scraper)",
|
||||
Timeout: 30 * time.Second,
|
||||
RetryCount: 3,
|
||||
RetryDelay: 1 * time.Second,
|
||||
Concurrency: scrapeConcurrency,
|
||||
}
|
||||
|
||||
sourceURL := strings.TrimSpace(args[0])
|
||||
sourceType := scraper.SourceType(scrapeType)
|
||||
if sourceType == "" {
|
||||
sourceType = detectSourceType(sourceURL)
|
||||
}
|
||||
|
||||
fmt.Printf("Scraping: %s\n", sourceURL)
|
||||
fmt.Printf(" Type: %s\n", sourceType)
|
||||
fmt.Printf(" Concurrency: %d\n", scrapeConcurrency)
|
||||
source := &scraper.Source{
|
||||
Name: extractName(sourceURL),
|
||||
Type: sourceType,
|
||||
URL: sourceURL,
|
||||
Query: strings.TrimSpace(scrapeSearchQuery),
|
||||
ResultLimit: scrapeSearchLimit,
|
||||
Domains: append([]string(nil), scrapeSearchDomains...),
|
||||
Include: append([]string(nil), scrapeInclude...),
|
||||
Exclude: append([]string(nil), scrapeExclude...),
|
||||
}
|
||||
if sourceType == scraper.SourceTypeLocal {
|
||||
source.Path = sourceURL
|
||||
}
|
||||
applySourceProfile(source)
|
||||
|
||||
outputDir := resolveOutputDir(cfg, scrapeOutput)
|
||||
count, err := scrapeOne(cmd, cfg, source, outputDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cfg.Indexing.Enabled {
|
||||
engine := search.NewEngine(cfg)
|
||||
if _, err := engine.Rebuild(context.Background()); err != nil {
|
||||
return fmt.Errorf("reindex after scrape: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\n✓ Scraping complete!\n")
|
||||
fmt.Printf(" Output: %s\n", outputDir)
|
||||
fmt.Printf(" Documents: %d\n", count)
|
||||
fmt.Println(" Run 'devour status' to inspect local index health")
|
||||
return nil
|
||||
}
|
||||
|
||||
func scrapeFromConfig(cmd *cobra.Command, cfg *appconfig.Config, configPath string) error {
|
||||
raw, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read sources file: %w", err)
|
||||
}
|
||||
|
||||
var list []appconfig.SourceConfig
|
||||
if err := yaml.Unmarshal(raw, &list); err != nil || len(list) == 0 {
|
||||
var wrapped struct {
|
||||
Sources []appconfig.SourceConfig `yaml:"sources"`
|
||||
}
|
||||
if wrapErr := yaml.Unmarshal(raw, &wrapped); wrapErr != nil {
|
||||
return fmt.Errorf("parse sources file: %w", err)
|
||||
}
|
||||
list = wrapped.Sources
|
||||
}
|
||||
if len(list) == 0 {
|
||||
return fmt.Errorf("sources file contains no sources")
|
||||
}
|
||||
|
||||
sort.Slice(list, func(i, j int) bool {
|
||||
return list[i].Name < list[j].Name
|
||||
})
|
||||
|
||||
outputDir := resolveOutputDir(cfg, scrapeOutput)
|
||||
success := 0
|
||||
failures := 0
|
||||
totalDocs := 0
|
||||
for _, srcCfg := range list {
|
||||
source := sourceFromConfig(srcCfg)
|
||||
if source.Type == "" {
|
||||
if source.URL != "" {
|
||||
source.Type = detectSourceType(source.URL)
|
||||
} else if source.Path != "" {
|
||||
source.Type = scraper.SourceTypeLocal
|
||||
}
|
||||
}
|
||||
if source.Name == "" {
|
||||
source.Name = extractName(source.URL)
|
||||
if source.Name == "unknown" && source.Path != "" {
|
||||
source.Name = filepath.Base(source.Path)
|
||||
}
|
||||
}
|
||||
applySourceProfile(source)
|
||||
|
||||
fmt.Printf("\n=== Source: %s (%s) ===\n", source.Name, source.Type)
|
||||
count, srcErr := scrapeOne(cmd, cfg, source, outputDir)
|
||||
if srcErr != nil {
|
||||
failures++
|
||||
fmt.Printf("✗ %s failed: %v\n", source.Name, srcErr)
|
||||
continue
|
||||
}
|
||||
totalDocs += count
|
||||
success++
|
||||
}
|
||||
|
||||
if cfg.Indexing.Enabled {
|
||||
engine := search.NewEngine(cfg)
|
||||
if _, err := engine.Rebuild(context.Background()); err != nil {
|
||||
return fmt.Errorf("reindex after scrape sources: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\nSummary: %d succeeded, %d failed, %d docs written\n", success, failures, totalDocs)
|
||||
if failures > 0 {
|
||||
return fmt.Errorf("one or more sources failed")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func scrapeOne(cmd *cobra.Command, cfg *appconfig.Config, source *scraper.Source, outputDir string) (int, error) {
|
||||
if source == nil {
|
||||
return 0, fmt.Errorf("source is required")
|
||||
}
|
||||
if source.Type == "" {
|
||||
return 0, fmt.Errorf("source type is required")
|
||||
}
|
||||
|
||||
if source.Type == scraper.SourceTypeLocalSearch && strings.TrimSpace(source.Query) == "" {
|
||||
return 0, fmt.Errorf("search query is required for localsearch sources")
|
||||
}
|
||||
|
||||
scraperConfig := toScraperConfig(cfg, scrapeConcurrency)
|
||||
s := scraper.NewScraper(source.Type, scraperConfig)
|
||||
if s == nil {
|
||||
return 0, fmt.Errorf("unsupported source type: %s", source.Type)
|
||||
}
|
||||
|
||||
fmt.Printf("Scraping: %s\n", chooseSourceLabel(source))
|
||||
fmt.Printf(" Type: %s\n", source.Type)
|
||||
fmt.Printf(" Concurrency: %d\n", scraperConfig.Concurrency)
|
||||
if source.Type == scraper.SourceTypeLocalSearch {
|
||||
fmt.Printf(" Search query: %s\n", source.Query)
|
||||
fmt.Printf(" Search limit: %d\n", source.ResultLimit)
|
||||
if len(source.Domains) > 0 {
|
||||
fmt.Printf(" Search domains: %s\n", strings.Join(source.Domains, ", "))
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
s := scraper.NewScraper(sourceType, config)
|
||||
if s == nil {
|
||||
return fmt.Errorf("unsupported source type: %s", sourceType)
|
||||
}
|
||||
|
||||
source := &scraper.Source{
|
||||
Name: extractName(sourceURL),
|
||||
Type: sourceType,
|
||||
URL: sourceURL,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), scraperConfig.Timeout*2)
|
||||
defer cancel()
|
||||
|
||||
docs, err := s.Scrape(ctx, source)
|
||||
if err != nil {
|
||||
return fmt.Errorf("scraping failed: %w", err)
|
||||
return 0, fmt.Errorf("scraping failed: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("✓ Scraped %d documents\n\n", len(docs))
|
||||
|
||||
if scrapeOutput == "" {
|
||||
scrapeOutput = "devour_data/docs"
|
||||
save, err := storage.SaveDocuments(docs, storage.SaveOptions{
|
||||
Format: scrapeFormat,
|
||||
OutputDir: outputDir,
|
||||
AllowEmpty: scrapeAllowEmpty,
|
||||
PrintWriter: func(format string, args ...any) {
|
||||
_, _ = fmt.Printf(format, args...)
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(scrapeOutput, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create output directory: %w", err)
|
||||
fmt.Printf("✓ Scraped %d documents\n", save.Count)
|
||||
|
||||
if err := updateSourceState(cfg, source, docs); err != nil {
|
||||
return save.Count, fmt.Errorf("update source state: %w", err)
|
||||
}
|
||||
|
||||
for i, doc := range docs {
|
||||
var filename string
|
||||
var content []byte
|
||||
|
||||
if scrapeFormat == "markdown" {
|
||||
filename = fmt.Sprintf("%s_%d.md", sanitizeFilename(doc.Title), i)
|
||||
|
||||
// Create enhanced markdown document
|
||||
markdownDoc := &markdown.Document{
|
||||
ID: doc.ID,
|
||||
Source: doc.Source,
|
||||
Type: string(doc.Type),
|
||||
Title: doc.Title,
|
||||
Content: doc.Content,
|
||||
URL: doc.URL,
|
||||
Metadata: doc.Metadata,
|
||||
Hash: doc.Hash,
|
||||
Timestamp: doc.Timestamp,
|
||||
}
|
||||
|
||||
formatter := markdown.NewFormatter()
|
||||
content = []byte(formatter.FormatWithTOC(markdownDoc))
|
||||
} else {
|
||||
filename = fmt.Sprintf("%s_%d.json", sanitizeFilename(doc.Title), i)
|
||||
content, err = json.MarshalIndent(doc, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal document: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
filePath := filepath.Join(scrapeOutput, filename)
|
||||
if err := os.WriteFile(filePath, content, 0644); err != nil {
|
||||
return fmt.Errorf("failed to write document: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf(" 📄 %s (%s)\n", filename, doc.Type)
|
||||
}
|
||||
|
||||
fmt.Printf("\n✓ Scraping complete!\n")
|
||||
fmt.Printf(" Output: %s\n", scrapeOutput)
|
||||
fmt.Println(" Run 'devour status' to see indexed documents")
|
||||
|
||||
return nil
|
||||
return save.Count, nil
|
||||
}
|
||||
|
||||
func scrapeFromConfig(configPath string) error {
|
||||
return fmt.Errorf("scraping from config file not yet implemented")
|
||||
func updateSourceState(cfg *appconfig.Config, source *scraper.Source, docs []*scraper.Document) error {
|
||||
state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
key := source.Name
|
||||
if key == "" {
|
||||
key = chooseSourceLabel(source)
|
||||
}
|
||||
|
||||
h := sha256.New()
|
||||
for _, d := range docs {
|
||||
if d == nil {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL)
|
||||
}
|
||||
state.Sources[key] = &projectstate.SourceState{
|
||||
Name: source.Name,
|
||||
Type: string(source.Type),
|
||||
URL: source.URL,
|
||||
Hash: hex.EncodeToString(h.Sum(nil)),
|
||||
LastSync: time.Now(),
|
||||
DocCount: len(docs),
|
||||
}
|
||||
|
||||
return projectstate.SaveSourceState(cfg.Storage.MetadataDir, state)
|
||||
}
|
||||
|
||||
func chooseSourceLabel(source *scraper.Source) string {
|
||||
if strings.TrimSpace(source.URL) != "" {
|
||||
return source.URL
|
||||
}
|
||||
if strings.TrimSpace(source.Path) != "" {
|
||||
return source.Path
|
||||
}
|
||||
if strings.TrimSpace(source.Repo) != "" {
|
||||
return source.Repo
|
||||
}
|
||||
return source.Name
|
||||
}
|
||||
|
||||
func detectSourceType(sourceURL string) scraper.SourceType {
|
||||
u, err := url.Parse(sourceURL)
|
||||
if err != nil {
|
||||
if sourceURL != "" && !strings.HasPrefix(sourceURL, "http://") && !strings.HasPrefix(sourceURL, "https://") {
|
||||
return scraper.SourceTypeLocal
|
||||
}
|
||||
return scraper.SourceTypeWeb
|
||||
}
|
||||
|
||||
@@ -208,6 +353,11 @@ func detectSourceType(sourceURL string) scraper.SourceType {
|
||||
return scraper.SourceTypeAstroDocs
|
||||
case host == "github.com":
|
||||
return scraper.SourceTypeGitHub
|
||||
case strings.HasSuffix(path, ".json") || strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml"):
|
||||
if strings.Contains(strings.ToLower(path), "openapi") || strings.Contains(strings.ToLower(path), "swagger") {
|
||||
return scraper.SourceTypeOpenAPI
|
||||
}
|
||||
return scraper.SourceTypeWeb
|
||||
default:
|
||||
return scraper.SourceTypeWeb
|
||||
}
|
||||
@@ -216,27 +366,81 @@ func detectSourceType(sourceURL string) scraper.SourceType {
|
||||
func extractName(sourceURL string) string {
|
||||
u, err := url.Parse(sourceURL)
|
||||
if err != nil {
|
||||
if strings.TrimSpace(sourceURL) != "" {
|
||||
return filepath.Base(sourceURL)
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
parts := strings.Split(strings.Trim(u.Path, "/"), "/")
|
||||
if len(parts) > 0 {
|
||||
if len(parts) > 0 && strings.TrimSpace(parts[len(parts)-1]) != "" {
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
|
||||
return u.Host
|
||||
if strings.TrimSpace(u.Host) != "" {
|
||||
return u.Host
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func sanitizeFilename(name string) string {
|
||||
name = strings.ToLower(name)
|
||||
name = strings.ReplaceAll(name, " ", "_")
|
||||
name = strings.ReplaceAll(name, "/", "_")
|
||||
name = strings.ReplaceAll(name, ":", "_")
|
||||
name = strings.ReplaceAll(name, ".", "_")
|
||||
|
||||
if len(name) > 50 {
|
||||
name = name[:50]
|
||||
func applySourceProfile(source *scraper.Source) {
|
||||
if source == nil {
|
||||
return
|
||||
}
|
||||
if source.Type != scraper.SourceTypeWeb && source.Type != scraper.SourceTypeLocalSearch {
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(source.URL) == "" {
|
||||
return
|
||||
}
|
||||
|
||||
return name
|
||||
u, err := url.Parse(source.URL)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
host := strings.ToLower(u.Host)
|
||||
if host == "" {
|
||||
return
|
||||
}
|
||||
|
||||
// Preserve explicit user-provided patterns.
|
||||
if len(source.Include) > 0 || len(source.Exclude) > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.Contains(host, "learn.microsoft.com"):
|
||||
source.Include = []string{`/dotnet/`, `/csharp/`, `/base-types/`}
|
||||
source.Exclude = []string{`/previous-versions/`, `/answers/`, `/support/`, `/training/`, `/events/`, `/products/`}
|
||||
case strings.Contains(host, "kotlinlang.org"):
|
||||
source.Include = []string{`/docs/`}
|
||||
source.Exclude = []string{`/community/`, `/api/`, `/releases/`}
|
||||
case strings.Contains(host, "php.net"):
|
||||
source.Include = []string{`/manual/en/`}
|
||||
source.Exclude = []string{`/manual/(de|fr|es|ja|ru|pt)/`, `/downloads.php`, `/bugs.php`}
|
||||
case strings.Contains(host, "ruby-doc.org"):
|
||||
source.Include = []string{`/core/`}
|
||||
source.Exclude = []string{`/stdlib/`, `/gems/`}
|
||||
case strings.Contains(host, "hexdocs.pm"):
|
||||
source.Include = []string{`/elixir/`}
|
||||
source.Exclude = []string{`/phoenix/`, `/ecto/`}
|
||||
case strings.Contains(host, "nextjs.org"):
|
||||
source.Include = []string{`/docs/`}
|
||||
source.Exclude = []string{`/showcase`, `/blog`, `/learn/`, `/pricing`}
|
||||
case strings.Contains(host, "svelte.dev"):
|
||||
source.Include = []string{`/docs/`}
|
||||
source.Exclude = []string{`/playground`, `/tutorial`, `/blog`}
|
||||
case strings.Contains(host, "angular.dev"):
|
||||
source.Include = []string{`/guide/`, `/api/`, `/tutorials/`}
|
||||
source.Exclude = []string{`/resources/`, `/playground`}
|
||||
case strings.Contains(host, "remix.run"):
|
||||
source.Include = []string{`/docs/`}
|
||||
source.Exclude = []string{`/blog`, `/conf`, `/merch`}
|
||||
case strings.Contains(host, "solidjs.com"):
|
||||
source.Include = []string{`/docs/`}
|
||||
source.Exclude = []string{`/community`, `/showcase`, `/blog`}
|
||||
case strings.Contains(host, "expressjs.com"):
|
||||
source.Include = []string{`/en/(guide|api|advanced)/`}
|
||||
source.Exclude = []string{`/en/starter/`, `/cn/`, `/fr/`, `/es/`, `/de/`}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
appconfig "github.com/yourorg/devour/internal/config"
|
||||
)
|
||||
|
||||
func TestScrapeFromConfig(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
_, _ = w.Write([]byte("<html><head><title>Docs</title></head><body><main>" + strings.Repeat("docs content ", 30) + "</main></body></html>"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
tmp := t.TempDir()
|
||||
cfg := appconfig.Default()
|
||||
cfg.Storage.DocsDir = filepath.Join(tmp, "docs")
|
||||
cfg.Storage.IndexDir = filepath.Join(tmp, "index")
|
||||
cfg.Storage.MetadataDir = filepath.Join(tmp, "metadata")
|
||||
cfg.Storage.CacheDir = filepath.Join(tmp, "cache")
|
||||
if err := cfg.EnsureStorageDirs(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
sourcesPath := filepath.Join(tmp, "sources.yaml")
|
||||
yaml := "- name: demo\n type: url\n url: " + srv.URL + "\n"
|
||||
if err := os.WriteFile(sourcesPath, []byte(yaml), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
oldFormat, oldOutput, oldAllow := scrapeFormat, scrapeOutput, scrapeAllowEmpty
|
||||
scrapeFormat = "json"
|
||||
scrapeOutput = cfg.Storage.DocsDir
|
||||
scrapeAllowEmpty = false
|
||||
defer func() {
|
||||
scrapeFormat, scrapeOutput, scrapeAllowEmpty = oldFormat, oldOutput, oldAllow
|
||||
}()
|
||||
|
||||
if err := scrapeFromConfig(nil, cfg, sourcesPath); err != nil {
|
||||
t.Fatalf("scrapeFromConfig failed: %v", err)
|
||||
}
|
||||
|
||||
entries, err := os.ReadDir(cfg.Storage.DocsDir)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(entries) == 0 {
|
||||
t.Fatal("expected scraped files")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
)
|
||||
|
||||
func TestDetectSourceType(t *testing.T) {
|
||||
tests := []struct {
|
||||
url string
|
||||
wantType scraper.SourceType
|
||||
}{
|
||||
{"https://pkg.go.dev/net/http", scraper.SourceTypeGoDocs},
|
||||
{"https://docs.rs/tokio/latest/tokio/", scraper.SourceTypeRustDocs},
|
||||
{"https://docs.python.org/3/library/asyncio.html", scraper.SourceTypePythonDocs},
|
||||
{"https://docs.oracle.com/javase/8/docs/api/java/util/List.html", scraper.SourceTypeJavaDocs},
|
||||
{"https://docs.spring.io/spring-boot/docs/current/reference/htmlsingle/", scraper.SourceTypeSpringDocs},
|
||||
{"https://www.typescriptlang.org/docs/handbook/2/basic-types.html", scraper.SourceTypeTSDocs},
|
||||
{"https://react.dev/reference/react", scraper.SourceTypeReactDocs},
|
||||
{"https://vuejs.org/guide/introduction.html", scraper.SourceTypeVueDocs},
|
||||
{"https://nuxt.com/docs/guide/directory-structure", scraper.SourceTypeNuxtDocs},
|
||||
{"https://docs.docker.com/compose", scraper.SourceTypeDockerDocs},
|
||||
{"https://hub.docker.com/mcp/server/github", scraper.SourceTypeMCPDocs},
|
||||
{"https://developers.cloudflare.com/workers", scraper.SourceTypeCloudflareDocs},
|
||||
{"https://docs.astro.build/en/guides/components/", scraper.SourceTypeAstroDocs},
|
||||
{"https://github.com/yourorg/devour", scraper.SourceTypeGitHub},
|
||||
{"https://example.com/docs", scraper.SourceTypeWeb},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.url, func(t *testing.T) {
|
||||
got := detectSourceType(tt.url)
|
||||
if got != tt.wantType {
|
||||
t.Fatalf("detectSourceType(%q) = %q, want %q", tt.url, got, tt.wantType)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
+185
-27
@@ -1,25 +1,29 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/yourorg/devour/internal/projectstate"
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
"github.com/yourorg/devour/internal/search"
|
||||
"github.com/yourorg/devour/internal/server"
|
||||
)
|
||||
|
||||
var serveCmd = &cobra.Command{
|
||||
Use: "serve",
|
||||
Short: "Start the MCP server",
|
||||
Long: `Start the Devour MCP server.
|
||||
Short: "Start the local Devour RPC server",
|
||||
Long: `Start the Devour RPC server.
|
||||
|
||||
In local mode (default), the server communicates via stdio, making it
|
||||
suitable for use as an OpenCode skill.
|
||||
|
||||
In remote mode (--remote flag), the server listens on HTTP and exposes
|
||||
a REST API for multi-user access.
|
||||
Local mode (default): JSON-RPC over stdin/stdout for agent/skill integration.
|
||||
Remote mode (--remote): experimental HTTP RPC endpoint at /rpc.
|
||||
|
||||
Examples:
|
||||
devour serve # Local mode (stdio)
|
||||
devour serve --remote # Remote mode on default port
|
||||
devour serve
|
||||
devour serve --remote
|
||||
devour serve --remote --port 3000`,
|
||||
RunE: runServe,
|
||||
}
|
||||
@@ -31,31 +35,185 @@ var (
|
||||
)
|
||||
|
||||
func init() {
|
||||
serveCmd.Flags().BoolVar(&serveRemote, "remote", false, "run as remote HTTP server")
|
||||
serveCmd.Flags().BoolVar(&serveRemote, "remote", false, "run as remote HTTP server (experimental)")
|
||||
serveCmd.Flags().IntVarP(&servePort, "port", "p", 8080, "HTTP port (remote mode only)")
|
||||
serveCmd.Flags().StringVar(&serveHost, "host", "localhost", "HTTP host (remote mode only)")
|
||||
}
|
||||
|
||||
func runServe(cmd *cobra.Command, args []string) error {
|
||||
if serveRemote {
|
||||
fmt.Printf("🚀 Starting Devour server in remote mode\n")
|
||||
fmt.Printf(" Host: %s\n", serveHost)
|
||||
fmt.Printf(" Port: %d\n", servePort)
|
||||
fmt.Printf(" URL: http://%s:%d\n", serveHost, servePort)
|
||||
|
||||
// TODO: Start HTTP MCP server
|
||||
return fmt.Errorf("remote mode not yet implemented")
|
||||
if _, err := loadAppConfig(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println("🚀 Starting Devour server in local mode (stdio)")
|
||||
fmt.Println(" Communicating via JSON-RPC over stdin/stdout")
|
||||
srvCfg := &server.Config{
|
||||
Mode: "local",
|
||||
Transport: "stdio",
|
||||
Host: serveHost,
|
||||
Port: servePort,
|
||||
Handler: func(ctx context.Context, method string, params json.RawMessage) (any, error) {
|
||||
return handleServeMethod(ctx, method, params)
|
||||
},
|
||||
}
|
||||
|
||||
// TODO: Start stdio MCP server
|
||||
// Should handle JSON-RPC messages for:
|
||||
// - devour_query
|
||||
// - devour_add
|
||||
// - devour_status
|
||||
// - devour_sync
|
||||
if serveRemote {
|
||||
srvCfg.Mode = "remote"
|
||||
fmt.Printf("🚀 Starting Devour RPC server in remote experimental mode\n")
|
||||
fmt.Printf(" URL: http://%s:%d/rpc\n", serveHost, servePort)
|
||||
} else {
|
||||
fmt.Println("🚀 Starting Devour RPC server in local mode (stdio)")
|
||||
fmt.Println(" Protocol: JSON-RPC 2.0 over stdin/stdout")
|
||||
}
|
||||
|
||||
return fmt.Errorf("local mode not yet implemented")
|
||||
srv := server.NewServer(srvCfg)
|
||||
return srv.Start(context.Background())
|
||||
}
|
||||
|
||||
func handleServeMethod(ctx context.Context, method string, params json.RawMessage) (any, error) {
|
||||
// The method implementation needs full typed config. Load per-call to avoid stale state.
|
||||
loadedCfg, err := loadAppConfig()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch strings.TrimSpace(method) {
|
||||
case "devour_query":
|
||||
var req struct {
|
||||
Query string `json:"query"`
|
||||
Limit int `json:"limit"`
|
||||
Threshold float64 `json:"threshold"`
|
||||
}
|
||||
if len(params) > 0 {
|
||||
_ = json.Unmarshal(params, &req)
|
||||
}
|
||||
engine := search.NewEngine(loadedCfg)
|
||||
results, stats, err := engine.Search(ctx, req.Query, search.SearchOptions{Limit: req.Limit, Threshold: req.Threshold})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return map[string]any{"query": req.Query, "count": len(results), "results": results, "indexed": stats.Documents}, nil
|
||||
|
||||
case "devour_status":
|
||||
docsStats, err := projectstate.CollectDocsStats(loadedCfg.Storage.DocsDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
state, _ := projectstate.LoadSourceState(loadedCfg.Storage.MetadataDir)
|
||||
engine := search.NewEngine(loadedCfg)
|
||||
idxStats, _ := engine.EnsureIndexed(ctx)
|
||||
return map[string]any{
|
||||
"documents": docsStats.DocumentCount,
|
||||
"storage_bytes": docsStats.StorageBytes,
|
||||
"last_updated": docsStats.LastUpdated,
|
||||
"sources": state.Sources,
|
||||
"indexed_docs": idxStats.Documents,
|
||||
"index_timestamp": idxStats.LastIndexedAt,
|
||||
}, nil
|
||||
|
||||
case "devour_scrape":
|
||||
var req struct {
|
||||
Source string `json:"source"`
|
||||
Type string `json:"type"`
|
||||
Format string `json:"format"`
|
||||
Output string `json:"output"`
|
||||
Query string `json:"query"`
|
||||
ResultLimit int `json:"result_limit"`
|
||||
Domains []string `json:"domains"`
|
||||
Include []string `json:"include"`
|
||||
Exclude []string `json:"exclude"`
|
||||
}
|
||||
if err := json.Unmarshal(params, &req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if strings.TrimSpace(req.Source) == "" {
|
||||
return nil, fmt.Errorf("source is required")
|
||||
}
|
||||
st := scraper.SourceType(req.Type)
|
||||
if st == "" {
|
||||
st = detectSourceType(req.Source)
|
||||
}
|
||||
source := &scraper.Source{
|
||||
Name: extractName(req.Source),
|
||||
Type: st,
|
||||
URL: req.Source,
|
||||
Query: strings.TrimSpace(req.Query),
|
||||
ResultLimit: req.ResultLimit,
|
||||
Domains: append([]string(nil), req.Domains...),
|
||||
Include: append([]string(nil), req.Include...),
|
||||
Exclude: append([]string(nil), req.Exclude...),
|
||||
}
|
||||
if st == scraper.SourceTypeLocal {
|
||||
source.Path = req.Source
|
||||
}
|
||||
applySourceProfile(source)
|
||||
prevFormat := scrapeFormat
|
||||
prevOutput := scrapeOutput
|
||||
prevAllowEmpty := scrapeAllowEmpty
|
||||
scrapeFormat = coalesceString(req.Format, "json")
|
||||
scrapeOutput = req.Output
|
||||
scrapeAllowEmpty = false
|
||||
count, err := scrapeOne(nil, loadedCfg, source, resolveOutputDir(loadedCfg, req.Output))
|
||||
scrapeFormat = prevFormat
|
||||
scrapeOutput = prevOutput
|
||||
scrapeAllowEmpty = prevAllowEmpty
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return map[string]any{"source": req.Source, "type": st, "documents": count}, nil
|
||||
|
||||
case "devour_ask":
|
||||
var req struct {
|
||||
Question string `json:"question"`
|
||||
Limit int `json:"limit"`
|
||||
}
|
||||
if err := json.Unmarshal(params, &req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if strings.TrimSpace(req.Question) == "" {
|
||||
return nil, fmt.Errorf("question is required")
|
||||
}
|
||||
limit := req.Limit
|
||||
if limit <= 0 {
|
||||
limit = 5
|
||||
}
|
||||
engine := search.NewEngine(loadedCfg)
|
||||
results, _, err := engine.Search(ctx, req.Question, search.SearchOptions{Limit: limit})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
summary := "No relevant docs found."
|
||||
if len(results) > 0 {
|
||||
summary = results[0].Snippet
|
||||
}
|
||||
return map[string]any{"question": req.Question, "summary": summary, "sources": results}, nil
|
||||
|
||||
case "devour_sync":
|
||||
prevForce, prevRebuild, prevSource := syncForce, syncRebuild, syncSource
|
||||
var req struct {
|
||||
Source string `json:"source"`
|
||||
Force bool `json:"force"`
|
||||
Rebuild bool `json:"rebuild"`
|
||||
}
|
||||
if len(params) > 0 {
|
||||
_ = json.Unmarshal(params, &req)
|
||||
}
|
||||
syncForce = req.Force
|
||||
syncRebuild = req.Rebuild
|
||||
syncSource = req.Source
|
||||
err := runSync(nil, nil)
|
||||
syncForce, syncRebuild, syncSource = prevForce, prevRebuild, prevSource
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return map[string]any{"ok": true}, nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown method: %s", method)
|
||||
}
|
||||
}
|
||||
|
||||
func coalesceString(primary, fallback string) string {
|
||||
if strings.TrimSpace(primary) != "" {
|
||||
return primary
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
+86
-17
@@ -1,10 +1,13 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/yourorg/devour/internal/projectstate"
|
||||
"github.com/yourorg/devour/internal/search"
|
||||
"github.com/yourorg/devour/internal/ui"
|
||||
)
|
||||
|
||||
@@ -23,39 +26,105 @@ Shows:
|
||||
}
|
||||
|
||||
func runStatus(cmd *cobra.Command, args []string) error {
|
||||
cfg, err := loadAppConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Print the small character mascot
|
||||
ui.PrintCharacterSmall()
|
||||
fmt.Println()
|
||||
|
||||
ui.PrintHeader("Devour Status")
|
||||
|
||||
// TODO: Implement actual status check
|
||||
// Check:
|
||||
// - Index existence and health
|
||||
// - Document count
|
||||
// - Vector count
|
||||
// - Last sync time
|
||||
// - Source status
|
||||
docsStats, err := projectstate.CollectDocsStats(cfg.Storage.DocsDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Placeholder status
|
||||
ui.PrintKeyValue("Index Health", "⚠️ Not initialized")
|
||||
ui.PrintKeyValue("Documents", "0 indexed")
|
||||
ui.PrintKeyValue("Chunks", "0 total")
|
||||
ui.PrintKeyValue("Vector Dimension", "1536")
|
||||
ui.PrintKeyValue("Last Updated", "Never")
|
||||
ui.PrintKeyValue("Storage Used", "0 MB")
|
||||
engine := search.NewEngine(cfg)
|
||||
indexStats, indexErr := engine.EnsureIndexed(context.Background())
|
||||
indexHealth := "✓ Healthy"
|
||||
if indexErr != nil {
|
||||
if docsStats.DocumentCount == 0 {
|
||||
indexHealth = "⚠️ No docs indexed yet"
|
||||
} else {
|
||||
indexHealth = "✗ Index error"
|
||||
}
|
||||
}
|
||||
|
||||
lastUpdated := "Never"
|
||||
if !docsStats.LastUpdated.IsZero() {
|
||||
lastUpdated = docsStats.LastUpdated.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
chunks := 0
|
||||
if indexStats != nil {
|
||||
chunks = indexStats.Documents
|
||||
}
|
||||
|
||||
ui.PrintKeyValue("Index Health", indexHealth)
|
||||
ui.PrintKeyValue("Documents", fmt.Sprintf("%d indexed", docsStats.DocumentCount))
|
||||
ui.PrintKeyValue("Chunks", fmt.Sprintf("%d total", chunks))
|
||||
ui.PrintKeyValue("Vector Dimension", fmt.Sprintf("%d", cfg.Embeddings.Dimensions))
|
||||
ui.PrintKeyValue("Last Updated", lastUpdated)
|
||||
ui.PrintKeyValue("Storage Used", humanSize(docsStats.StorageBytes))
|
||||
|
||||
fmt.Println()
|
||||
ui.PrintSection("Sources")
|
||||
ui.PrintInfo(" None configured")
|
||||
state, stateErr := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
|
||||
if stateErr != nil || len(state.Sources) == 0 {
|
||||
ui.PrintInfo(" None tracked yet")
|
||||
} else {
|
||||
keys := make([]string, 0, len(state.Sources))
|
||||
for k := range state.Sources {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sortStrings(keys)
|
||||
for _, k := range keys {
|
||||
s := state.Sources[k]
|
||||
last := "never"
|
||||
if !s.LastSync.IsZero() {
|
||||
last = s.LastSync.Format("2006-01-02 15:04:05")
|
||||
}
|
||||
fmt.Printf(" • %s (%s): %d docs, last sync %s\n", s.Name, s.Type, s.DocCount, last)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
ui.PrintSection("Next Steps")
|
||||
fmt.Println(" 1. Run 'devour init' to initialize")
|
||||
fmt.Println(" 2. Run 'devour scrape <source>' to index documents")
|
||||
if docsStats.DocumentCount == 0 {
|
||||
fmt.Println(" 1. Run 'devour scrape <source>' to index documentation")
|
||||
fmt.Println(" 2. Run 'devour query \"<topic>\"' to search indexed docs")
|
||||
} else {
|
||||
fmt.Println(" 1. Run 'devour query \"<topic>\"' for local docs search")
|
||||
fmt.Println(" 2. Run 'devour ask --lang <lang> \"<question>\"' for structured answers")
|
||||
}
|
||||
if indexErr != nil {
|
||||
fmt.Printf(" ⚠️ Index note: %v\n", indexErr)
|
||||
}
|
||||
|
||||
// Show when check happened
|
||||
fmt.Printf("\nStatus as of: %s\n", time.Now().Format(time.RFC3339))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func humanSize(b int64) string {
|
||||
const mb = 1024 * 1024
|
||||
if b < mb {
|
||||
return fmt.Sprintf("%d KB", b/1024)
|
||||
}
|
||||
return fmt.Sprintf("%.2f MB", float64(b)/float64(mb))
|
||||
}
|
||||
|
||||
func sortStrings(values []string) {
|
||||
if len(values) < 2 {
|
||||
return
|
||||
}
|
||||
for i := 1; i < len(values); i++ {
|
||||
for j := i; j > 0 && values[j] < values[j-1]; j-- {
|
||||
values[j], values[j-1] = values[j-1], values[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+157
-17
@@ -1,9 +1,18 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/yourorg/devour/internal/projectstate"
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
"github.com/yourorg/devour/internal/search"
|
||||
"github.com/yourorg/devour/internal/storage"
|
||||
)
|
||||
|
||||
var syncCmd = &cobra.Command{
|
||||
@@ -12,7 +21,7 @@ var syncCmd = &cobra.Command{
|
||||
Long: `Fetch updates from all configured sources.
|
||||
|
||||
Checks each source for changes (using hash or timestamp comparison)
|
||||
and updates the index accordingly.
|
||||
and updates the local docs + index accordingly.
|
||||
|
||||
Examples:
|
||||
devour sync # Sync all sources
|
||||
@@ -34,29 +43,160 @@ func init() {
|
||||
}
|
||||
|
||||
func runSync(cmd *cobra.Command, args []string) error {
|
||||
cfg, err := loadAppConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if syncRebuild {
|
||||
fmt.Println("🔄 Rebuilding index from all sources...")
|
||||
fmt.Println("🔄 Rebuilding local index from configured sources...")
|
||||
} else {
|
||||
fmt.Println("🔄 Syncing with configured sources...")
|
||||
fmt.Println("🔄 Syncing configured sources...")
|
||||
}
|
||||
|
||||
if syncSource != "" {
|
||||
fmt.Printf(" Source: %s\n", syncSource)
|
||||
if len(cfg.Sources) == 0 {
|
||||
fmt.Println("No sources configured. Add sources in devour.yaml first.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: Implement actual sync logic
|
||||
// 1. Load sources from config
|
||||
// 2. For each source:
|
||||
// a. Check for changes (hash/timestamp)
|
||||
// b. If changes detected or --force:
|
||||
// - Scrape updated content
|
||||
// - Re-generate embeddings
|
||||
// - Update index
|
||||
// 3. Update metadata
|
||||
state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("⚠️ Sync functionality not yet implemented")
|
||||
fmt.Println(" Configure sources in devour.yaml first")
|
||||
updated := 0
|
||||
skipped := 0
|
||||
failed := 0
|
||||
totalDocs := 0
|
||||
|
||||
for _, srcCfg := range cfg.Sources {
|
||||
if syncSource != "" && srcCfg.Name != syncSource {
|
||||
continue
|
||||
}
|
||||
|
||||
source := sourceFromConfig(srcCfg)
|
||||
if source.Type == "" {
|
||||
if source.URL != "" {
|
||||
source.Type = detectSourceType(source.URL)
|
||||
} else if source.Path != "" {
|
||||
source.Type = scraper.SourceTypeLocal
|
||||
}
|
||||
}
|
||||
if source.Name == "" {
|
||||
source.Name = extractName(source.URL)
|
||||
}
|
||||
applySourceProfile(source)
|
||||
|
||||
fmt.Printf("\n• %s (%s)\n", source.Name, source.Type)
|
||||
s := scraper.NewScraper(source.Type, toScraperConfig(cfg, 0))
|
||||
if s == nil {
|
||||
failed++
|
||||
fmt.Printf(" ✗ unsupported source type: %s\n", source.Type)
|
||||
continue
|
||||
}
|
||||
|
||||
key := source.Name
|
||||
if key == "" {
|
||||
key = chooseSourceLabel(source)
|
||||
}
|
||||
lastHash := ""
|
||||
if prev := state.Sources[key]; prev != nil {
|
||||
lastHash = prev.Hash
|
||||
}
|
||||
|
||||
needsUpdate := syncForce || syncRebuild
|
||||
newHash := lastHash
|
||||
if !needsUpdate {
|
||||
changed, hash, detectErr := s.DetectChanges(context.Background(), source, lastHash)
|
||||
if detectErr != nil {
|
||||
fmt.Printf(" ⚠ change detection failed (%v), scraping anyway\n", detectErr)
|
||||
needsUpdate = true
|
||||
} else {
|
||||
needsUpdate = changed
|
||||
newHash = hash
|
||||
}
|
||||
}
|
||||
|
||||
if !needsUpdate {
|
||||
skipped++
|
||||
fmt.Println(" ✓ no changes")
|
||||
continue
|
||||
}
|
||||
|
||||
docs, scrapeErr := s.Scrape(context.Background(), source)
|
||||
if scrapeErr != nil {
|
||||
failed++
|
||||
fmt.Printf(" ✗ scrape failed: %v\n", scrapeErr)
|
||||
state.Sources[key] = &projectstate.SourceState{
|
||||
Name: source.Name,
|
||||
Type: string(source.Type),
|
||||
URL: source.URL,
|
||||
Hash: lastHash,
|
||||
LastSync: time.Now(),
|
||||
DocCount: 0,
|
||||
LastError: scrapeErr.Error(),
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
saved, saveErr := storage.SaveDocuments(docs, storage.SaveOptions{
|
||||
Format: "json",
|
||||
OutputDir: cfg.Storage.DocsDir,
|
||||
AllowEmpty: false,
|
||||
PrintWriter: nil,
|
||||
})
|
||||
if saveErr != nil {
|
||||
failed++
|
||||
fmt.Printf(" ✗ save failed: %v\n", saveErr)
|
||||
continue
|
||||
}
|
||||
|
||||
if newHash == "" {
|
||||
h := sha256.New()
|
||||
for _, d := range docs {
|
||||
if d == nil {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL)
|
||||
}
|
||||
newHash = hex.EncodeToString(h.Sum(nil))
|
||||
}
|
||||
|
||||
state.Sources[key] = &projectstate.SourceState{
|
||||
Name: source.Name,
|
||||
Type: string(source.Type),
|
||||
URL: source.URL,
|
||||
Hash: newHash,
|
||||
LastSync: time.Now(),
|
||||
DocCount: saved.Count,
|
||||
LastError: "",
|
||||
}
|
||||
|
||||
updated++
|
||||
totalDocs += saved.Count
|
||||
fmt.Printf(" ✓ updated (%d docs)\n", saved.Count)
|
||||
}
|
||||
|
||||
if err := projectstate.SaveSourceState(cfg.Storage.MetadataDir, state); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if syncRebuild || updated > 0 {
|
||||
engine := search.NewEngine(cfg)
|
||||
if _, err := engine.Rebuild(context.Background()); err != nil {
|
||||
return fmt.Errorf("rebuild index: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\nSync summary: updated=%d skipped=%d failed=%d docs=%d\n", updated, skipped, failed, totalDocs)
|
||||
if failed > 0 {
|
||||
return fmt.Errorf("sync completed with failures")
|
||||
}
|
||||
if syncSource != "" && updated == 0 && skipped == 0 && failed == 0 {
|
||||
return fmt.Errorf("source %q not found in config", syncSource)
|
||||
}
|
||||
if strings.TrimSpace(syncSource) != "" {
|
||||
fmt.Printf("Synced source: %s\n", syncSource)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
+169
@@ -0,0 +1,169 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
)
|
||||
|
||||
var (
|
||||
verifyFormat string
|
||||
verifyTimeout int
|
||||
)
|
||||
|
||||
var verifyCmd = &cobra.Command{
|
||||
Use: "verify",
|
||||
Short: "Run Devour verification suites",
|
||||
Long: `Run deterministic and live verification suites for Devour commands and scrapers.`,
|
||||
}
|
||||
|
||||
var verifySmokeCmd = &cobra.Command{
|
||||
Use: "smoke",
|
||||
Short: "Run live docs scraping smoke checks",
|
||||
Long: `Run an opt-in live network smoke suite and persist a machine-readable report under devour_data/verify/.`,
|
||||
RunE: runVerifySmoke,
|
||||
}
|
||||
|
||||
func init() {
|
||||
verifyCmd.AddCommand(verifySmokeCmd)
|
||||
verifySmokeCmd.Flags().StringVar(&verifyFormat, "format", "text", "output format (text, json)")
|
||||
verifySmokeCmd.Flags().IntVar(&verifyTimeout, "timeout", 90, "timeout per smoke case in seconds")
|
||||
}
|
||||
|
||||
type verifyCase struct {
|
||||
Name string `json:"name"`
|
||||
Type scraper.SourceType `json:"type"`
|
||||
URL string `json:"url"`
|
||||
Passed bool `json:"passed"`
|
||||
Docs int `json:"docs"`
|
||||
Error string `json:"error,omitempty"`
|
||||
TookMs int64 `json:"took_ms"`
|
||||
}
|
||||
|
||||
type verifyReport struct {
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Duration string `json:"duration"`
|
||||
Passed int `json:"passed"`
|
||||
Failed int `json:"failed"`
|
||||
Cases []verifyCase `json:"cases"`
|
||||
}
|
||||
|
||||
func runVerifySmoke(cmd *cobra.Command, args []string) error {
|
||||
cfg, err := loadAppConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if verifyTimeout <= 0 {
|
||||
verifyTimeout = 90
|
||||
}
|
||||
|
||||
cases := []verifyCase{
|
||||
{Name: "Go net/http", Type: scraper.SourceTypeGoDocs, URL: "https://pkg.go.dev/net/http"},
|
||||
{Name: "Python asyncio", Type: scraper.SourceTypePythonDocs, URL: "https://docs.python.org/3/library/asyncio.html"},
|
||||
{Name: "React reference", Type: scraper.SourceTypeReactDocs, URL: "https://react.dev/reference/react"},
|
||||
{Name: "TypeScript handbook", Type: scraper.SourceTypeTSDocs, URL: "https://www.typescriptlang.org/docs/handbook/2/basic-types.html"},
|
||||
{Name: "Next.js docs", Type: scraper.SourceTypeWeb, URL: "https://nextjs.org/docs"},
|
||||
{Name: "Svelte docs", Type: scraper.SourceTypeWeb, URL: "https://svelte.dev/docs/kit"},
|
||||
{Name: "Angular guide", Type: scraper.SourceTypeWeb, URL: "https://angular.dev/guide/http"},
|
||||
{Name: "Remix docs", Type: scraper.SourceTypeWeb, URL: "https://v2.remix.run/docs"},
|
||||
{Name: "Solid docs repo", Type: scraper.SourceTypeGitHub, URL: "https://github.com/solidjs/solid-docs"},
|
||||
{Name: "Express guide", Type: scraper.SourceTypeWeb, URL: "https://expressjs.com/en/guide/routing.html"},
|
||||
}
|
||||
|
||||
startAll := time.Now()
|
||||
passed := 0
|
||||
failed := 0
|
||||
|
||||
for i := range cases {
|
||||
c := &cases[i]
|
||||
caseStart := time.Now()
|
||||
s := scraper.NewScraper(c.Type, toScraperConfig(cfg, 4))
|
||||
if s == nil {
|
||||
c.Error = "scraper not available"
|
||||
c.Passed = false
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(verifyTimeout)*time.Second)
|
||||
docs, err := s.Scrape(ctx, &scraper.Source{Name: c.Name, Type: c.Type, URL: c.URL})
|
||||
cancel()
|
||||
c.TookMs = time.Since(caseStart).Milliseconds()
|
||||
|
||||
if err != nil {
|
||||
c.Error = err.Error()
|
||||
c.Passed = false
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
c.Docs = len(docs)
|
||||
if len(docs) == 0 {
|
||||
c.Error = "0 documents"
|
||||
c.Passed = false
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
c.Passed = true
|
||||
passed++
|
||||
}
|
||||
|
||||
report := verifyReport{
|
||||
CreatedAt: time.Now(),
|
||||
Duration: time.Since(startAll).String(),
|
||||
Passed: passed,
|
||||
Failed: failed,
|
||||
Cases: cases,
|
||||
}
|
||||
|
||||
rootDataDir := filepath.Dir(cfg.Storage.DocsDir)
|
||||
verifyDir := filepath.Join(rootDataDir, "verify")
|
||||
if err := os.MkdirAll(verifyDir, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
filename := fmt.Sprintf("smoke-%s.json", time.Now().Format("20060102-150405"))
|
||||
reportPath := filepath.Join(verifyDir, filename)
|
||||
b, err := json.MarshalIndent(report, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.WriteFile(reportPath, b, 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch verifyFormat {
|
||||
case "json":
|
||||
enc := json.NewEncoder(cmd.OutOrStdout())
|
||||
enc.SetIndent("", " ")
|
||||
if err := enc.Encode(report); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "Smoke verification complete\n")
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Passed: %d\n", report.Passed)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Failed: %d\n", report.Failed)
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " Report: %s\n", reportPath)
|
||||
for _, c := range report.Cases {
|
||||
status := "PASS"
|
||||
if !c.Passed {
|
||||
status = "FAIL"
|
||||
}
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " - [%s] %s (%d docs, %dms)", status, c.Name, c.Docs, c.TookMs)
|
||||
if c.Error != "" {
|
||||
fmt.Fprintf(cmd.OutOrStdout(), " error=%s", c.Error)
|
||||
}
|
||||
fmt.Fprintln(cmd.OutOrStdout())
|
||||
}
|
||||
}
|
||||
|
||||
if report.Failed > 0 {
|
||||
return fmt.Errorf("smoke verification completed with failures")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user