This commit is contained in:
Tomas Dvorak
2026-02-24 10:33:59 +01:00
parent 409acd2e08
commit 898a3c303f
1374 changed files with 290409 additions and 29187 deletions
+1012
View File
File diff suppressed because it is too large Load Diff
+144
View File
@@ -0,0 +1,144 @@
package cmd
import (
"strings"
"testing"
"github.com/yourorg/devour/internal/scraper"
)
func TestDeriveSearchTerms(t *testing.T) {
terms := deriveSearchTerms("go", "how to regex match http path")
if len(terms) == 0 {
t.Fatal("expected at least one derived search term")
}
joined := strings.Join(terms, ",")
if !strings.Contains(joined, "regexp") {
t.Fatalf("expected regexp term in %v", terms)
}
if !strings.Contains(joined, "net/http") {
t.Fatalf("expected net/http term in %v", terms)
}
}
func TestScoreDocument(t *testing.T) {
query := "regex match in go"
docTitleMatch := &scraper.Document{
Title: "Package regexp",
Content: "Use MustCompile and MatchString to match values.",
Type: "go-package",
URL: "https://pkg.go.dev/regexp",
}
docNoMatch := &scraper.Document{
Title: "Package archive/tar",
Content: "Read and write tar archives.",
Type: "go-package",
URL: "https://pkg.go.dev/archive/tar",
}
if scoreDocument(query, docTitleMatch) <= scoreDocument(query, docNoMatch) {
t.Fatal("expected regex-related document to have a higher score")
}
}
func TestExtractRecommendedAPI(t *testing.T) {
docs := []rankedDoc{
{
doc: &scraper.Document{
Title: "regexp.func MustCompile ¶",
URL: "https://pkg.go.dev/regexp",
Content: "re := regexp.MustCompile(`\\\\d+`)\nif re.MatchString(input) { fmt.Println(\"ok\") }",
},
},
}
apis := extractRecommendedAPI(docs)
if len(apis) == 0 {
t.Fatal("expected API extraction to return at least one call")
}
}
func TestExtractSnippet(t *testing.T) {
content := "The regexp package implements regular expression search. Use MustCompile for fixed patterns."
snippet := extractSnippet(content, []string{"regexp"})
if snippet == "" {
t.Fatal("expected non-empty snippet")
}
if !strings.Contains(strings.ToLower(snippet), "regexp") {
t.Fatalf("snippet should mention regexp, got: %q", snippet)
}
}
func TestCandidateDocURLs_FrameworkFallbacks(t *testing.T) {
next, err := candidateDocURLs("nextjs", "routing")
if err != nil {
t.Fatalf("candidateDocURLs(nextjs) error: %v", err)
}
if len(next) < 2 {
t.Fatalf("expected fallback URLs for nextjs, got %v", next)
}
if next[0] != "https://nextjs.org/docs/app/building-your-application/routing" {
t.Fatalf("unexpected primary nextjs URL: %q", next[0])
}
remix, err := candidateDocURLs("remix", "routes")
if err != nil {
t.Fatalf("candidateDocURLs(remix) error: %v", err)
}
if len(remix) == 0 || remix[0] != "https://v2.remix.run/docs/file-conventions/routes" {
t.Fatalf("unexpected remix candidate URLs: %v", remix)
}
solid, err := candidateDocURLs("solid", "router")
if err != nil {
t.Fatalf("candidateDocURLs(solid) error: %v", err)
}
if len(solid) == 0 || !strings.Contains(solid[0], "github.com/solidjs/solid-docs") {
t.Fatalf("unexpected solid candidate URLs: %v", solid)
}
}
func TestPrimaryQueryTokenSkipsQuestionWords(t *testing.T) {
token := primaryQueryToken("what does routing do in remix")
if token == "" {
t.Fatal("expected non-empty token")
}
if token == "what" || token == "does" {
t.Fatalf("expected informative token, got %q", token)
}
}
func TestDeriveSearchTermsSolidRouting(t *testing.T) {
terms := deriveSearchTerms("solid", "how to do routing in solid")
joined := strings.Join(terms, ",")
if !strings.Contains(joined, "solid-router") {
t.Fatalf("expected solid-router term in %v", terms)
}
if strings.Contains(joined, "signals") {
t.Fatalf("did not expect signals default for routing question, got %v", terms)
}
}
func TestShouldFallbackToLive(t *testing.T) {
strong := []rankedDoc{
{
doc: &scraper.Document{Title: "Routing Guide", Content: "routing with file based routes", URL: "https://nextjs.org/docs/routing"},
score: 2.2,
},
}
if shouldFallbackToLive(strong, []string{"routing"}) {
t.Fatal("expected strong local match to skip live fallback")
}
weak := []rankedDoc{
{
doc: &scraper.Document{Title: "Misc", Content: "unrelated", URL: "https://example.com"},
score: 0.1,
},
}
if !shouldFallbackToLive(weak, []string{"routing"}) {
t.Fatal("expected weak local match to trigger live fallback")
}
}
+181
View File
@@ -0,0 +1,181 @@
package cmd
import (
"encoding/json"
"fmt"
"net/url"
"os"
"os/exec"
"sort"
"strings"
"unicode"
"github.com/spf13/cobra"
)
var (
autoDryRun bool
autoJSON bool
autoLang string
)
var autoCmd = &cobra.Command{
Use: "auto <intent>",
Short: "Route natural-language intent to the best Devour command",
Long: `Auto-classify intent and run the best matching command (get/scrape/ask/quality).
Examples:
devour auto "how to parse json in go"
devour auto "https://pkg.go.dev/net/http"
devour auto "check code quality" --dry-run
devour auto "what is useEffect" --lang react`,
Args: cobra.MinimumNArgs(1),
RunE: runAuto,
}
func init() {
autoCmd.Flags().BoolVar(&autoDryRun, "dry-run", false, "print selected command without executing")
autoCmd.Flags().BoolVar(&autoJSON, "json", false, "output route decision as JSON")
autoCmd.Flags().StringVar(&autoLang, "lang", "", "optional language override for ask/get routes")
}
type autoDecision struct {
Intent string `json:"intent"`
Route string `json:"route"`
Reason string `json:"reason"`
Command []string `json:"command"`
}
func runAuto(cmd *cobra.Command, args []string) error {
intent := strings.TrimSpace(strings.Join(args, " "))
if intent == "" {
return fmt.Errorf("intent is required")
}
decision, err := classifyIntent(intent, strings.TrimSpace(autoLang))
if err != nil {
return err
}
if autoJSON {
enc := json.NewEncoder(cmd.OutOrStdout())
enc.SetIndent("", " ")
return enc.Encode(decision)
}
fmt.Printf("Route: %s\n", decision.Route)
fmt.Printf("Reason: %s\n", decision.Reason)
fmt.Printf("Command: devour %s\n", strings.Join(decision.Command, " "))
if autoDryRun {
return nil
}
exe, err := os.Executable()
if err != nil {
return err
}
run := exec.Command(exe, decision.Command...)
run.Stdout = cmd.OutOrStdout()
run.Stderr = cmd.ErrOrStderr()
return run.Run()
}
func classifyIntent(intent, langOverride string) (*autoDecision, error) {
lower := strings.ToLower(intent)
trimmed := strings.TrimSpace(intent)
if u, err := url.Parse(trimmed); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
route := []string{"scrape", trimmed}
return &autoDecision{Intent: intent, Route: "scrape", Reason: "detected URL input", Command: route}, nil
}
if strings.Contains(lower, "quality") || strings.Contains(lower, "technical debt") || strings.Contains(lower, "lint") || strings.Contains(lower, "code smell") {
route := []string{"quality", "status"}
if strings.Contains(lower, "scan") {
route = []string{"quality", "scan", "."}
}
return &autoDecision{Intent: intent, Route: "quality", Reason: "detected quality-analysis intent", Command: route}, nil
}
language := strings.TrimSpace(langOverride)
if language == "" {
language = inferLanguageFromText(lower)
}
if language != "" {
if canonical, ok := normalizeLanguage(language); ok {
language = canonical
} else {
language = ""
}
}
if strings.Contains(lower, "?") || strings.Contains(lower, "how") || strings.Contains(lower, "why") || strings.Contains(lower, "what") {
if language == "" {
language = "go"
}
route := []string{"ask", "--lang", language, intent, "--format", "text"}
return &autoDecision{Intent: intent, Route: "ask", Reason: "question-style intent", Command: route}, nil
}
if language == "" {
language = "go"
}
keyword := inferKeyword(intent)
if canonical, ok := normalizeLanguage(keyword); ok && canonical == language {
keyword = "overview"
}
route := []string{"get", language, keyword}
return &autoDecision{Intent: intent, Route: "get", Reason: "default docs retrieval route", Command: route}, nil
}
func inferLanguageFromText(text string) string {
text = strings.ToLower(text)
if strings.Contains(text, "c#") {
return "csharp"
}
if strings.Contains(text, "next.js") {
return "nextjs"
}
tokens := strings.FieldsFunc(text, func(r rune) bool {
return !(unicode.IsLetter(r) || unicode.IsDigit(r))
})
tokenSet := make(map[string]bool, len(tokens))
for _, tok := range tokens {
if tok != "" {
tokenSet[tok] = true
}
}
aliases := make([]string, 0, len(languageAliases()))
for alias := range languageAliases() {
aliases = append(aliases, alias)
}
sort.Slice(aliases, func(i, j int) bool {
return len(aliases[i]) > len(aliases[j])
})
for _, alias := range aliases {
if tokenSet[alias] {
return alias
}
}
return ""
}
func inferKeyword(intent string) string {
words := strings.Fields(strings.ToLower(intent))
stop := map[string]bool{
"get": true, "docs": true, "documentation": true, "about": true, "for": true, "on": true,
"the": true, "a": true, "an": true, "show": true, "me": true, "please": true,
}
for _, w := range words {
w = strings.Trim(w, ",.!?;:")
if w == "" || stop[w] || len(w) < 2 {
continue
}
return w
}
return "overview"
}
+31
View File
@@ -0,0 +1,31 @@
package cmd
import "testing"
func TestInferLanguageFromText_UsesTokenBoundaries(t *testing.T) {
if got := inferLanguageFromText("get nextjs docs"); got != "nextjs" {
t.Fatalf("inferLanguageFromText matched %q, want %q", got, "nextjs")
}
if got := inferLanguageFromText("read docs for architecture"); got != "" {
t.Fatalf("inferLanguageFromText should not infer language from plain docs text, got %q", got)
}
}
func TestClassifyIntent_GetRouteKeywordFallback(t *testing.T) {
decision, err := classifyIntent("get nextjs docs", "")
if err != nil {
t.Fatalf("classifyIntent returned error: %v", err)
}
if decision.Route != "get" {
t.Fatalf("expected get route, got %q", decision.Route)
}
if len(decision.Command) != 3 {
t.Fatalf("expected 3 command args, got %v", decision.Command)
}
if decision.Command[1] != "nextjs" {
t.Fatalf("expected language nextjs, got %q", decision.Command[1])
}
if decision.Command[2] != "overview" {
t.Fatalf("expected keyword overview, got %q", decision.Command[2])
}
}
+133 -45
View File
@@ -14,6 +14,7 @@ import argparse
class ModernBannerGenerator:
def __init__(self, data):
self.data = data
self.fonts = self._init_fonts()
# Devour brand colors - consistent with Go theme
self.colors = {
@@ -56,6 +57,49 @@ class ModernBannerGenerator:
'severity_t3': (251, 146, 60), # #fb923c - bright orange
'severity_t4': (248, 113, 113), # #f87171 - bright red
}
def _init_fonts(self):
"""Initialize font candidates and cache."""
# Prefer widely-available fonts on Linux/macOS/Windows.
font_candidates = {
"regular": [
"arial.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
"/System/Library/Fonts/Supplemental/Arial.ttf",
"/Library/Fonts/Arial.ttf",
],
"bold": [
"arialbd.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
"/System/Library/Fonts/Supplemental/Arial Bold.ttf",
"/Library/Fonts/Arial Bold.ttf",
],
}
return {
"candidates": font_candidates,
"cache": {},
}
def get_font(self, size, weight="regular"):
"""Get a cached font or fall back to the default."""
key = (size, weight)
if key in self.fonts["cache"]:
return self.fonts["cache"][key]
for path in self.fonts["candidates"].get(weight, []):
try:
font = ImageFont.truetype(path, size)
self.fonts["cache"][key] = font
return font
except:
continue
font = ImageFont.load_default()
self.fonts["cache"][key] = font
return font
def get_score_color(self, score, muted=False):
if score >= 90:
@@ -89,6 +133,22 @@ class ModernBannerGenerator:
for x in range(width):
img.putpixel((x, y), (r, g, b))
# Add subtle radial glows for depth
self.draw_glow(img, width * 0.15, height * 0.2, 220, (71, 85, 105), 40)
self.draw_glow(img, width * 0.85, height * 0.75, 260, (251, 146, 60), 35)
def draw_glow(self, img, cx, cy, radius, color, max_alpha):
"""Draw a soft radial glow."""
draw = ImageDraw.Draw(img)
steps = 12
for i in range(steps):
r = radius - (radius * i / steps)
alpha = int(max_alpha * (1 - i / steps))
draw.ellipse(
[(cx - r, cy - r), (cx + r, cy + r)],
fill=(*color, alpha),
)
def draw_glass_card(self, draw, x, y, width, height, border_radius=12, use_alt=False):
"""Draw glass morphism card with enhanced effects"""
@@ -125,9 +185,9 @@ class ModernBannerGenerator:
def draw_score_circle(self, draw, cx, cy, radius, score, label="OVERALL", is_primary=True):
"""Draw enhanced circular score visualization"""
# Background circle with subtle border
draw.ellipse([(cx-radius-2, cy-radius-2), (cx+radius+2, cy+radius+2)],
draw.ellipse([(cx-radius-2, cy-radius-2), (cx+radius+2, cy+radius+2)],
fill=(*self.colors['border'], 100))
draw.ellipse([(cx-radius, cy-radius), (cx+radius, cy+radius)],
draw.ellipse([(cx-radius, cy-radius), (cx+radius, cy+radius)],
fill=self.colors['card'], outline=self.colors['border'])
# Progress arc with enhanced styling
@@ -136,25 +196,26 @@ class ModernBannerGenerator:
percentage = score / 100.0
# Draw background arc
draw.arc([(cx-radius+4, cy-radius+4), (cx+radius-4, cy+radius-4)],
draw.arc([(cx-radius+4, cy-radius+4), (cx+radius-4, cy+radius-4)],
-90, 270, fill=self.colors['border_subtle'], width=6)
# Draw progress arc
start_angle = -90
end_angle = start_angle + (360 * percentage)
arc_width = 8 if is_primary else 6
arc_width = 9 if is_primary else 6
draw.arc([(cx-radius+4, cy-radius+4), (cx+radius-4, cy+radius-4)],
draw.arc([(cx-radius+4, cy-radius+4), (cx+radius-4, cy+radius-4)],
start_angle, end_angle,
fill=score_color, width=arc_width)
# Inner glow ring
if is_primary:
draw.arc([(cx-radius+10, cy-radius+10), (cx+radius-10, cy+radius-10)],
start_angle, end_angle, fill=score_color, width=2)
# Enhanced typography
try:
font_large = ImageFont.truetype("arial.ttf", 32 if is_primary else 28)
font_small = ImageFont.truetype("arial.ttf", 11)
except:
font_large = ImageFont.load_default()
font_small = ImageFont.load_default()
font_large = self.get_font(34 if is_primary else 28, weight="bold")
font_small = self.get_font(11, weight="regular")
# Score text
score_text = f"{int(score)}%"
@@ -163,14 +224,14 @@ class ModernBannerGenerator:
text_height = bbox[3] - bbox[1]
text_color = self.colors['text'] if is_primary else self.colors['text_muted']
draw.text((cx - text_width//2, cy - text_height//2 - 2), score_text,
draw.text((cx - text_width//2, cy - text_height//2 - 2), score_text,
fill=text_color, font=font_large)
# Label
label_bbox = draw.textbbox((0, 0), label, font=font_small)
label_width = label_bbox[2] - label_bbox[0]
draw.text((cx - label_width//2, cy + radius + 15), label,
draw.text((cx - label_width//2, cy + radius + 15), label,
fill=self.colors['text_dim'], font=font_small)
def draw_grade_badge(self, draw, x, y, grade):
@@ -185,14 +246,11 @@ class ModernBannerGenerator:
6, fill=(0, 0, 0, 60))
# Main badge
draw.rounded_rectangle([(x, y), (x + badge_width, y + badge_height)],
draw.rounded_rectangle([(x, y), (x + badge_width, y + badge_height)],
6, fill=grade_color, outline=self.colors['border'])
# Grade text with better typography
try:
font = ImageFont.truetype("arial.ttf", 18)
except:
font = ImageFont.load_default()
font = self.get_font(18, weight="bold")
bbox = draw.textbbox((0, 0), grade, font=font)
text_width = bbox[2] - bbox[0]
@@ -201,15 +259,14 @@ class ModernBannerGenerator:
draw.text((x + badge_width//2 - text_width//2, y + badge_height//2 - text_height//2 + 1),
grade, fill=(255, 255, 255), font=font)
def draw_text(self, draw, text, x, y, size=14, color=None, centered=False):
def draw_text(self, draw, text, x, y, size=14, color=None, centered=False, max_width=None, min_size=9, weight="regular"):
"""Draw enhanced text with better typography"""
if color is None:
color = self.colors['text']
try:
font = ImageFont.truetype("arial.ttf", size)
except:
font = ImageFont.load_default()
font = self.get_font(size, weight=weight)
if max_width is not None:
font = self.fit_font(draw, text, font, max_width, min_size=min_size, weight=weight)
if centered:
bbox = draw.textbbox((0, 0), text, font=font)
@@ -217,16 +274,43 @@ class ModernBannerGenerator:
x = x - text_width // 2
draw.text((x, y), text, fill=color, font=font)
def fit_font(self, draw, text, font, max_width, min_size=9, weight="regular"):
"""Shrink font until text fits max width."""
if font == ImageFont.load_default():
return font
size = font.size if hasattr(font, "size") else min_size
current = font
while size > min_size:
bbox = draw.textbbox((0, 0), text, font=current)
if (bbox[2] - bbox[0]) <= max_width:
return current
size -= 1
current = self.get_font(size, weight=weight)
return current
def truncate_text(self, draw, text, font, max_width):
"""Truncate text with ellipsis to fit width."""
if max_width <= 0:
return ""
if draw.textbbox((0, 0), text, font=font)[2] <= max_width:
return text
ellipsis = "..."
for i in range(len(text), 0, -1):
candidate = text[:i] + ellipsis
if draw.textbbox((0, 0), candidate, font=font)[2] <= max_width:
return candidate
return ellipsis
def draw_metric_card(self, draw, x, y, width, height, title, value, color):
"""Draw metric card"""
self.draw_glass_card(draw, x, y, width, height)
# Title
self.draw_text(draw, title, x + 15, y + 15, size=12, color=self.colors['text_muted'])
self.draw_text(draw, title, x + 15, y + 14, size=12, color=self.colors['text_muted'])
# Value
self.draw_text(draw, value, x + 15, y + 40, size=20, color=color)
self.draw_text(draw, value, x + 15, y + 38, size=20, color=color, weight="bold")
def draw_severity_bars(self, draw, x, y, width, height, find_by_tier):
"""Draw enhanced severity bars"""
@@ -313,18 +397,18 @@ class ModernBannerGenerator:
# Enhanced header section
header_y = content_y + 20
self.draw_text(draw, "DEVOUR SCORE", content_x + content_width//2, header_y,
size=20, color=self.colors['text'], centered=True)
size=20, color=self.colors['text'], centered=True, weight="bold")
# Project info
project_name = self.data['project_name']
version_text = f"v{self.data['version']}" if self.data['version'] else "latest"
project_text = f"{project_name} {version_text}"
self.draw_text(draw, project_text, content_x + content_width//2, header_y + 25,
size=14, color=self.colors['text_muted'], centered=True)
size=14, color=self.colors['text_muted'], centered=True, max_width=content_width - 120)
# Timestamp
time_text = self.data.get('timestamp', 'Today')
self.draw_text(draw, time_text, content_x + content_width//2,
self.draw_text(draw, time_text, content_x + content_width//2,
content_y + content_height - 25,
size=11, color=self.colors['text_dim'], centered=True)
@@ -347,19 +431,19 @@ class ModernBannerGenerator:
# Total findings
self.draw_text(draw, str(findings_total), col_x + col_width//2, metrics_y,
size=18, color=self.colors['text'], centered=True)
size=18, color=self.colors['text'], centered=True, weight="bold")
self.draw_text(draw, "TOTAL", col_x + col_width//2, metrics_y + 22,
size=10, color=self.colors['text_muted'], centered=True)
# Open findings
self.draw_text(draw, str(findings_open), col_x + col_width + col_width//2, metrics_y,
size=18, color=self.colors['orange'], centered=True)
size=18, color=self.colors['orange'], centered=True, weight="bold")
self.draw_text(draw, "OPEN", col_x + col_width + col_width//2, metrics_y + 22,
size=10, color=self.colors['text_muted'], centered=True)
# Resolved findings
self.draw_text(draw, str(findings_closed), col_x + 2*col_width + col_width//2, metrics_y,
size=18, color=self.colors['score_a'], centered=True)
size=18, color=self.colors['score_a'], centered=True, weight="bold")
self.draw_text(draw, "RESOLVED", col_x + 2*col_width + col_width//2, metrics_y + 22,
size=10, color=self.colors['text_muted'], centered=True)
@@ -379,7 +463,7 @@ class ModernBannerGenerator:
# Header section
header_y = 30
self.draw_text(draw, f"{self.data['project_name']} Quality Report",
width//2, header_y, size=28, color=self.colors['text'], centered=True)
width//2, header_y, size=28, color=self.colors['text'], centered=True, weight="bold", max_width=width - 80)
version_text = f"v{self.data['version']}" if self.data['version'] else "latest"
self.draw_text(draw, version_text, width//2, header_y + 35,
@@ -399,8 +483,8 @@ class ModernBannerGenerator:
# Score details
score_details_y = score_y + 100
self.draw_text(draw, f"Overall: {int(self.data['overall_score'])}%",
score_x, score_details_y, size=20,
color=self.get_score_color(self.data['overall_score']), centered=True)
score_x, score_details_y, size=20,
color=self.get_score_color(self.data['overall_score']), centered=True, weight="bold")
self.draw_text(draw, f"Strict: {int(self.data['strict_score'])}%",
score_x, score_details_y + 25, size=16,
color=self.get_score_color(self.data['strict_score'], muted=True), centered=True)
@@ -419,7 +503,7 @@ class ModernBannerGenerator:
# Column 1 Header
self.draw_text(draw, "Score Breakdown", col1_x + col_width//2, grid_start_y + 20,
size=18, color=self.colors['text'], centered=True)
size=18, color=self.colors['text'], centered=True, weight="bold")
# Column 1 Data
score_data = [
@@ -439,7 +523,7 @@ class ModernBannerGenerator:
# Value
self.draw_text(draw, value, col1_x + col_width//2, data_y + 35,
size=24, color=color, centered=True)
size=24, color=color, centered=True, weight="bold")
data_y += 80
@@ -449,15 +533,19 @@ class ModernBannerGenerator:
# Column 2 Header
self.draw_text(draw, "Findings by Type", col2_x + col_width//2, grid_start_y + 20,
size=18, color=self.colors['text'], centered=True)
size=18, color=self.colors['text'], centered=True, weight="bold")
# Column 2 Data - Top finding types
type_data_y = grid_start_y + 60
type_items = list(self.data['find_by_type'].items())[:6] # Top 6 types
max_type_count = max(self.data['find_by_type'].values()) if self.data['find_by_type'] else 1
if not type_items:
self.draw_text(draw, "No findings", col2_x + col_width//2, grid_start_y + 110,
size=14, color=self.colors['text_dim'], centered=True)
for issue_type, count in type_items:
# Type bar
bar_width = int((col_width - 40) * (count / max(self.data['find_by_type'].values())))
bar_width = int((col_width - 40) * (count / max_type_count))
bar_height = 22
# Bar background
@@ -469,9 +557,9 @@ class ModernBannerGenerator:
4, fill=self.colors['orange'])
# Type label
label_text = f"{issue_type}"
if len(label_text) > 20:
label_text = label_text[:17] + "..."
label_text = f"{issue_type}".replace("_", " ")
font_label = self.get_font(11, weight="regular")
label_text = self.truncate_text(draw, label_text, font_label, col_width - 90)
self.draw_text(draw, label_text, col2_x + 25, type_data_y + 2,
size=11, color=self.colors['text_muted'])
@@ -487,7 +575,7 @@ class ModernBannerGenerator:
# Column 3 Header
self.draw_text(draw, "Issues by Severity", col3_x + col_width//2, grid_start_y + 20,
size=18, color=self.colors['text'], centered=True)
size=18, color=self.colors['text'], centered=True, weight="bold")
# Column 3 Data - Severity breakdown
severity_data_y = grid_start_y + 60
@@ -510,11 +598,11 @@ class ModernBannerGenerator:
# Severity name
self.draw_text(draw, severity_name, col3_x + 50, severity_data_y + 15,
size=14, color=self.colors['text'])
size=14, color=self.colors['text'], max_width=col_width - 70)
# Count
self.draw_text(draw, f"{count} issues", col3_x + 50, severity_data_y + 35,
size=16, color=color)
size=16, color=color, weight="bold")
severity_data_y += 70
@@ -539,7 +627,7 @@ class ModernBannerGenerator:
# Value
self.draw_text(draw, value, metric_x + metrics_width//2, summary_y + 10,
size=18, color=color, centered=True)
size=18, color=color, centered=True, weight="bold")
# Label
self.draw_text(draw, label, metric_x + metrics_width//2, summary_y + 30,
-1
View File
@@ -24,7 +24,6 @@ This command will:
}
func init() {
rootCmd.AddCommand(demoCmd)
}
func runDemo(cmd *cobra.Command, args []string) error {
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+5
View File
@@ -0,0 +1,5 @@
{
"version": "1",
"built_at": "2026-02-23T11:19:21.65415175+01:00",
"docs": []
}
@@ -0,0 +1,7 @@
{
"version": "1",
"built_at": "2026-02-23T11:19:21.65415175+01:00",
"docs_dir": "./devour_data/docs",
"source_file_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
"doc_count": 0
}
+9 -8
View File
@@ -1,3 +1,5 @@
//go:build ignore
package main
import (
@@ -5,7 +7,6 @@ import (
"time"
"github.com/yourorg/devour/internal/quality"
"github.com/yourorg/devour/internal/quality/scorecard"
)
func main() {
@@ -80,14 +81,14 @@ func main() {
},
LastScan: time.Now(),
Scorecard: &quality.Scorecard{
TotalScore: 72,
StrictScore: 68,
TotalScore: 72,
StrictScore: 68,
FindingsByType: map[string]int{
"complexity": 1,
"naming": 1,
"duplication": 1,
"security": 1,
"unused_import": 1,
"complexity": 1,
"naming": 1,
"duplication": 1,
"security": 1,
"unused_import": 1,
},
FindingsByTier: map[quality.Severity]int{
quality.SeverityT1: 1,
+226 -58
View File
@@ -2,6 +2,7 @@ package cmd
import (
"fmt"
"sort"
"strings"
"github.com/spf13/cobra"
@@ -11,112 +12,210 @@ var getCmd = &cobra.Command{
Use: "get <language> <keyword>",
Short: "Get documentation for a language/framework",
Long: `Quickly fetch documentation for popular languages and frameworks.
This command automatically maps language+keyword combinations to their official documentation sites.
Supported languages:
go, golang - Go documentation (pkg.go.dev)
rust - Rust documentation (docs.rs)
python, py - Python documentation (docs.python.org)
java - Java documentation (docs.oracle.com)
spring - Spring Boot documentation (docs.spring.io)
typescript, ts - TypeScript documentation (typescriptlang.org)
react - React documentation (react.dev)
vue - Vue.js documentation (vuejs.org)
nuxt - Nuxt documentation (nuxt.com)
docker - Docker documentation (docs.docker.com)
cloudflare, cf - Cloudflare documentation (developers.cloudflare.com)
astro - Astro documentation (docs.astro.build)
This command maps language+keyword combinations to official documentation sources.
Examples:
devour get go http # Go HTTP package documentation
devour get python asyncio # Python asyncio module
devour get react hooks # React Hooks documentation
devour get docker compose # Docker Compose docs
devour get rust tokio # Rust Tokio crate`,
devour get go http
devour get python asyncio
devour get react hooks
devour get nextjs routing
devour get express middleware`,
Args: cobra.ExactArgs(2),
RunE: runGet,
}
func init() {
// Add flags that can override defaults
getCmd.Flags().StringVarP(&scrapeFormat, "format", "f", "json", "output format (json, markdown)")
getCmd.Flags().StringVarP(&scrapeOutput, "output", "o", "", "output directory (default: devour_data/docs)")
getCmd.Flags().StringVarP(&scrapeOutput, "output", "o", "", "output directory (default: configured docs dir)")
getCmd.Flags().IntVar(&scrapeConcurrency, "concurrency", 10, "parallel scraping workers")
}
func runGet(cmd *cobra.Command, args []string) error {
language := strings.ToLower(args[0])
keyword := strings.ToLower(args[1])
langIn := strings.ToLower(strings.TrimSpace(args[0]))
keyword := strings.TrimSpace(args[1])
if keyword == "" {
return fmt.Errorf("keyword is required")
}
language, ok := normalizeLanguage(langIn)
if !ok {
return fmt.Errorf("unsupported language: %s. Supported: %s", langIn, strings.Join(supportedLanguages(), ", "))
}
// Map language to base URL and construct full URL
url, err := constructDocURL(language, keyword)
if err != nil {
return err
}
// Set the scrape type based on language
sourceType := mapLanguageToType(language)
// Reuse the existing scrape logic with pre-determined values
scrapeType = string(sourceType)
sourceURL := url
scrapeType = sourceType
fmt.Printf("Getting docs for: %s %s\n", language, keyword)
fmt.Printf("URL: %s\n", sourceURL)
fmt.Printf("Type: %s\n", sourceType)
fmt.Println()
fmt.Printf("URL: %s\n", url)
fmt.Printf("Type: %s\n\n", sourceType)
// Call the existing scrape logic
return runScrape(cmd, []string{sourceURL})
return runScrape(cmd, []string{url})
}
func constructDocURL(language, keyword string) (string, error) {
language = strings.ToLower(strings.TrimSpace(language))
keyword = strings.TrimSpace(keyword)
lowerKeyword := strings.ToLower(keyword)
switch language {
case "go", "golang":
return fmt.Sprintf("https://pkg.go.dev/%s", keyword), nil
case "go":
return fmt.Sprintf("https://pkg.go.dev/%s", lowerKeyword), nil
case "rust":
return fmt.Sprintf("https://docs.rs/%s/latest/%s/", keyword, keyword), nil
case "python", "py":
if keyword == "stdlib" || keyword == "standard" {
return fmt.Sprintf("https://docs.rs/%s/latest/%s/", lowerKeyword, lowerKeyword), nil
case "python":
if lowerKeyword == "stdlib" || lowerKeyword == "standard" {
return "https://docs.python.org/3/library/", nil
}
return fmt.Sprintf("https://docs.python.org/3/library/%s.html", keyword), nil
return fmt.Sprintf("https://docs.python.org/3/library/%s.html", lowerKeyword), nil
case "java":
return fmt.Sprintf("https://docs.oracle.com/javase/8/docs/api/%s.html", keyword), nil
return fmt.Sprintf("https://docs.oracle.com/javase/8/docs/api/%s.html", lowerKeyword), nil
case "spring":
return fmt.Sprintf("https://docs.spring.io/spring-boot/docs/current/reference/htmlsingle/#%s", keyword), nil
case "typescript", "ts":
return fmt.Sprintf("https://www.typescriptlang.org/docs/handbook/%s.html", keyword), nil
if lowerKeyword == "mcp" || lowerKeyword == "mcp-overview" {
return "https://docs.spring.io/spring-ai/reference/api/mcp/mcp-overview.html", nil
}
return fmt.Sprintf("https://docs.spring.io/spring-boot/docs/current/reference/htmlsingle/#%s", lowerKeyword), nil
case "typescript":
return fmt.Sprintf("https://www.typescriptlang.org/docs/handbook/%s.html", lowerKeyword), nil
case "react":
return fmt.Sprintf("https://react.dev/reference/react/%s", keyword), nil
if lowerKeyword == "hooks" {
return "https://react.dev/reference/react", nil
}
return fmt.Sprintf("https://react.dev/reference/react/%s", lowerKeyword), nil
case "vue":
return fmt.Sprintf("https://vuejs.org/guide/%s.html", keyword), nil
if strings.Contains(lowerKeyword, "api") {
return "https://vuejs.org/api/", nil
}
return fmt.Sprintf("https://vuejs.org/guide/%s.html", lowerKeyword), nil
case "nuxt":
return fmt.Sprintf("https://nuxt.com/docs/guide/%s", keyword), nil
return fmt.Sprintf("https://nuxt.com/docs/guide/%s", lowerKeyword), nil
case "docker":
return fmt.Sprintf("https://docs.docker.com/%s", keyword), nil
case "cloudflare", "cf":
return fmt.Sprintf("https://developers.cloudflare.com/%s", keyword), nil
return fmt.Sprintf("https://docs.docker.com/%s", lowerKeyword), nil
case "cloudflare":
return fmt.Sprintf("https://developers.cloudflare.com/%s", lowerKeyword), nil
case "astro":
return fmt.Sprintf("https://docs.astro.build/en/guides/%s", keyword), nil
path := lowerKeyword
switch lowerKeyword {
case "components":
path = "basics/astro-components"
case "api":
path = "reference/api-reference"
case "install", "setup", "getting-started":
path = "install-and-setup"
default:
if !strings.Contains(lowerKeyword, "/") {
path = "guides/" + lowerKeyword
}
}
return fmt.Sprintf("https://docs.astro.build/en/%s/", path), nil
case "csharp":
lowerKeyword = strings.TrimPrefix(lowerKeyword, "/")
if strings.Contains(lowerKeyword, "regex") || strings.Contains(lowerKeyword, "regular-expression") {
return "https://learn.microsoft.com/en-us/dotnet/standard/base-types/regular-expressions", nil
}
return fmt.Sprintf("https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/%s", lowerKeyword), nil
case "kotlin":
lowerKeyword = strings.TrimPrefix(lowerKeyword, "/")
if lowerKeyword == "regex" || lowerKeyword == "regexp" {
lowerKeyword = "strings"
}
if strings.HasSuffix(lowerKeyword, ".html") {
return fmt.Sprintf("https://kotlinlang.org/docs/%s", lowerKeyword), nil
}
return fmt.Sprintf("https://kotlinlang.org/docs/%s.html", lowerKeyword), nil
case "php":
lowerKeyword = strings.TrimPrefix(lowerKeyword, "/")
if strings.HasSuffix(lowerKeyword, ".php") || strings.Contains(lowerKeyword, "function.") || strings.Contains(lowerKeyword, "book.") {
return fmt.Sprintf("https://www.php.net/manual/en/%s", lowerKeyword), nil
}
return fmt.Sprintf("https://www.php.net/manual/en/book.%s.php", lowerKeyword), nil
case "ruby":
keyword = strings.TrimPrefix(keyword, "/")
switch strings.ToLower(keyword) {
case "regex", "regexp":
keyword = "Regexp"
case "string":
keyword = "String"
case "array":
keyword = "Array"
default:
if !strings.Contains(keyword, "::") && len(keyword) > 0 {
keyword = strings.ToUpper(keyword[:1]) + strings.ToLower(keyword[1:])
}
}
return fmt.Sprintf("https://ruby-doc.org/core/%s.html", keyword), nil
case "elixir":
keyword = strings.TrimPrefix(keyword, "/")
switch strings.ToLower(keyword) {
case "regex":
keyword = "Regex"
case "string":
keyword = "String"
case "enum":
keyword = "Enum"
default:
if len(keyword) > 0 {
keyword = strings.ToUpper(keyword[:1]) + strings.ToLower(keyword[1:])
}
}
return fmt.Sprintf("https://hexdocs.pm/elixir/%s.html", keyword), nil
case "nextjs":
if strings.Contains(lowerKeyword, "routing") {
return "https://nextjs.org/docs/app/building-your-application/routing", nil
}
if strings.Contains(lowerKeyword, "data") || strings.Contains(lowerKeyword, "fetch") {
return "https://nextjs.org/docs/app/building-your-application/data-fetching", nil
}
return "https://nextjs.org/docs", nil
case "svelte":
if strings.Contains(lowerKeyword, "kit") {
return "https://svelte.dev/docs/kit", nil
}
return "https://svelte.dev/docs/svelte/overview", nil
case "angular":
if strings.Contains(lowerKeyword, "http") {
return "https://angular.dev/guide/http", nil
}
return "https://angular.dev/guide/components", nil
case "remix":
if strings.Contains(lowerKeyword, "route") {
return "https://v2.remix.run/docs/file-conventions/routes", nil
}
return "https://v2.remix.run/docs", nil
case "solid":
// Solid docs are published from this repository and include solid-router content.
return "https://github.com/solidjs/solid-docs", nil
case "express":
if strings.Contains(lowerKeyword, "routing") {
return "https://expressjs.com/en/guide/routing.html", nil
}
if strings.Contains(lowerKeyword, "middleware") {
return "https://expressjs.com/en/guide/using-middleware.html", nil
}
return "https://expressjs.com/en/guide/writing-middleware.html", nil
default:
return "", fmt.Errorf("unsupported language: %s. Supported languages: go, rust, python, java, spring, typescript, react, vue, nuxt, docker, cloudflare, astro", language)
return "", fmt.Errorf("unsupported language: %s. Supported: %s", language, strings.Join(supportedLanguages(), ", "))
}
}
func mapLanguageToType(language string) string {
language, _ = normalizeLanguage(language)
switch language {
case "go", "golang":
case "go":
return "godocs"
case "rust":
return "rustdocs"
case "python", "py":
case "python":
return "pythondocs"
case "java":
return "javadocs"
case "spring":
return "springdocs"
case "typescript", "ts":
case "typescript":
return "tsdocs"
case "react":
return "reactdocs"
@@ -126,11 +225,80 @@ func mapLanguageToType(language string) string {
return "nuxtdocs"
case "docker":
return "dockerdocs"
case "cloudflare", "cf":
case "cloudflare":
return "cloudflaredocs"
case "astro":
return "astrodocs"
case "csharp", "kotlin", "php", "ruby", "elixir", "nextjs", "svelte", "angular", "remix", "express":
return "url"
case "solid":
return "github"
default:
return "web"
return ""
}
}
func normalizeLanguage(language string) (string, bool) {
language = strings.ToLower(strings.TrimSpace(language))
if language == "" {
return "", false
}
if canonical, ok := languageAliases()[language]; ok {
return canonical, true
}
return "", false
}
func languageAliases() map[string]string {
return map[string]string{
"go": "go",
"golang": "go",
"rust": "rust",
"python": "python",
"py": "python",
"java": "java",
"spring": "spring",
"typescript": "typescript",
"ts": "typescript",
"react": "react",
"vue": "vue",
"nuxt": "nuxt",
"docker": "docker",
"cloudflare": "cloudflare",
"cf": "cloudflare",
"astro": "astro",
"csharp": "csharp",
"cs": "csharp",
"kotlin": "kotlin",
"kt": "kotlin",
"php": "php",
"ruby": "ruby",
"rb": "ruby",
"elixir": "elixir",
"ex": "elixir",
"next": "nextjs",
"nextjs": "nextjs",
"svelte": "svelte",
"angular": "angular",
"ng": "angular",
"remix": "remix",
"solid": "solid",
"solidjs": "solid",
"express": "express",
"expressjs": "express",
}
}
func supportedLanguages() []string {
seen := map[string]bool{}
out := make([]string, 0)
for _, canonical := range languageAliases() {
if seen[canonical] {
continue
}
seen[canonical] = true
out = append(out, canonical)
}
sort.Strings(out)
return out
}
+121
View File
@@ -0,0 +1,121 @@
package cmd
import "testing"
func TestConstructDocURL_SupportedLanguages(t *testing.T) {
tests := []struct {
language string
keyword string
wantURL string
}{
{"go", "net/http", "https://pkg.go.dev/net/http"},
{"rust", "tokio", "https://docs.rs/tokio/latest/tokio/"},
{"python", "asyncio", "https://docs.python.org/3/library/asyncio.html"},
{"java", "java/util/list", "https://docs.oracle.com/javase/8/docs/api/java/util/list.html"},
{"spring", "mcp", "https://docs.spring.io/spring-ai/reference/api/mcp/mcp-overview.html"},
{"typescript", "utility-types", "https://www.typescriptlang.org/docs/handbook/utility-types.html"},
{"react", "hooks", "https://react.dev/reference/react"},
{"vue", "essentials/reactivity-fundamentals", "https://vuejs.org/guide/essentials/reactivity-fundamentals.html"},
{"nuxt", "directory-structure", "https://nuxt.com/docs/guide/directory-structure"},
{"docker", "compose", "https://docs.docker.com/compose"},
{"cloudflare", "workers", "https://developers.cloudflare.com/workers"},
{"astro", "components", "https://docs.astro.build/en/basics/astro-components/"},
{"csharp", "regex", "https://learn.microsoft.com/en-us/dotnet/standard/base-types/regular-expressions"},
{"kotlin", "regex", "https://kotlinlang.org/docs/strings.html"},
{"php", "pcre", "https://www.php.net/manual/en/book.pcre.php"},
{"ruby", "Regexp", "https://ruby-doc.org/core/Regexp.html"},
{"elixir", "String", "https://hexdocs.pm/elixir/String.html"},
{"nextjs", "routing", "https://nextjs.org/docs/app/building-your-application/routing"},
{"svelte", "kit", "https://svelte.dev/docs/kit"},
{"angular", "http", "https://angular.dev/guide/http"},
{"remix", "routes", "https://v2.remix.run/docs/file-conventions/routes"},
{"solid", "signals", "https://github.com/solidjs/solid-docs"},
{"express", "routing", "https://expressjs.com/en/guide/routing.html"},
}
for _, tt := range tests {
t.Run(tt.language+"_"+tt.keyword, func(t *testing.T) {
got, err := constructDocURL(tt.language, tt.keyword)
if err != nil {
t.Fatalf("constructDocURL(%q, %q) returned error: %v", tt.language, tt.keyword, err)
}
if got != tt.wantURL {
t.Fatalf("constructDocURL(%q, %q) = %q, want %q", tt.language, tt.keyword, got, tt.wantURL)
}
})
}
}
func TestConstructDocURL_UnsupportedLanguage(t *testing.T) {
if _, err := constructDocURL("haskell", "regex-tdfa"); err == nil {
t.Fatal("constructDocURL should return an error for unsupported language")
}
}
func TestMapLanguageToType(t *testing.T) {
tests := []struct {
language string
wantType string
}{
{"go", "godocs"},
{"golang", "godocs"},
{"rust", "rustdocs"},
{"python", "pythondocs"},
{"py", "pythondocs"},
{"java", "javadocs"},
{"spring", "springdocs"},
{"typescript", "tsdocs"},
{"ts", "tsdocs"},
{"react", "reactdocs"},
{"vue", "vuedocs"},
{"nuxt", "nuxtdocs"},
{"docker", "dockerdocs"},
{"cloudflare", "cloudflaredocs"},
{"cf", "cloudflaredocs"},
{"astro", "astrodocs"},
{"csharp", "url"},
{"kotlin", "url"},
{"php", "url"},
{"ruby", "url"},
{"elixir", "url"},
{"nextjs", "url"},
{"next", "url"},
{"svelte", "url"},
{"angular", "url"},
{"ng", "url"},
{"remix", "url"},
{"solidjs", "github"},
{"expressjs", "url"},
{"unknown", ""},
}
for _, tt := range tests {
t.Run(tt.language, func(t *testing.T) {
got := mapLanguageToType(tt.language)
if got != tt.wantType {
t.Fatalf("mapLanguageToType(%q) = %q, want %q", tt.language, got, tt.wantType)
}
})
}
}
func TestNormalizeLanguage(t *testing.T) {
tests := []struct {
in string
want string
ok bool
}{
{"go", "go", true},
{"golang", "go", true},
{"next", "nextjs", true},
{"solidjs", "solid", true},
{"expressjs", "express", true},
{"unknown", "", false},
}
for _, tt := range tests {
got, ok := normalizeLanguage(tt.in)
if got != tt.want || ok != tt.ok {
t.Fatalf("normalizeLanguage(%q) = (%q,%v), want (%q,%v)", tt.in, got, ok, tt.want, tt.ok)
}
}
}
+5 -61
View File
@@ -6,6 +6,7 @@ import (
"path/filepath"
"github.com/spf13/cobra"
appconfig "github.com/yourorg/devour/internal/config"
)
var initCmd = &cobra.Command{
@@ -53,7 +54,10 @@ func runInit(cmd *cobra.Command, args []string) error {
}
// Create default config
config := generateDefaultConfig(initRemote)
config, err := appconfig.RenderInitYAML(initRemote)
if err != nil {
return fmt.Errorf("failed to render default config: %w", err)
}
if err := os.WriteFile(configPath, []byte(config), 0644); err != nil {
return fmt.Errorf("failed to write config: %w", err)
}
@@ -82,63 +86,3 @@ func runInit(cmd *cobra.Command, args []string) error {
return nil
}
func generateDefaultConfig(remote bool) string {
mode := "local"
if remote {
mode = "remote"
}
return fmt.Sprintf(`# Devour Configuration
version: 1
# Storage paths
storage:
docs_dir: ./devour_data/docs
index_dir: ./devour_data/index
metadata_dir: ./devour_data/metadata
# Embedding settings
embeddings:
provider: openai
model: text-embedding-3-small
dimensions: 1536
api_key: ${OPENAI_API_KEY}
batch_size: 100
# Vector database
vector_db:
type: chromem
persist: true
similarity_metric: cosine
# Scraping settings
scraper:
user_agent: "Devour/1.0"
timeout: 30s
retry_count: 3
concurrency: 10
rate_limit: 500ms
max_depth: 3
cache_dir: ./devour_data/cache
# Scheduler
scheduler:
enabled: true
interval: 72h
check_method: hash
# Server settings
server:
mode: %s
port: 8080
host: localhost
# Sources (add your own)
sources: []
# - name: example-docs
# type: url
# url: https://docs.example.com
# include: ["**/*.md", "**/*.html"]
`, mode)
}
+67 -95
View File
@@ -1,118 +1,90 @@
package cmd
import (
"encoding/json"
"fmt"
"io"
"strings"
"github.com/spf13/cobra"
)
var languagesFormat string
var languagesCmd = &cobra.Command{
Use: "languages",
Short: "Show supported languages and their mappings",
Long: `Display all supported languages for the 'devour get' command
along with their base URLs and examples.
This helps you discover what documentation sources are available
and how to reference them quickly.`,
Short: "Show supported languages and aliases",
Long: `Display all supported languages for 'devour get' and 'devour ask'
with aliases and starter examples.`,
RunE: runLanguages,
}
func init() {
rootCmd.AddCommand(languagesCmd)
languagesCmd.Flags().StringVar(&languagesFormat, "format", "text", "output format (text, json)")
}
type languageInfo struct {
Canonical string `json:"canonical"`
Aliases []string `json:"aliases"`
Example string `json:"example"`
Source string `json:"source"`
}
func runLanguages(cmd *cobra.Command, args []string) error {
fmt.Println("🌐 Devour Supported Languages")
fmt.Println("═══════════════════════════════════════════════════════════════")
fmt.Println()
languages := []struct {
langs []string
url string
examples []string
}{
{
langs: []string{"go", "golang"},
url: "https://pkg.go.dev/{package}",
examples: []string{"devour get go http", "devour get go fmt", "devour get golang json"},
},
{
langs: []string{"rust"},
url: "https://docs.rs/{crate}/latest/{crate}/",
examples: []string{"devour get rust tokio", "devour get rust serde", "devour get rust clap"},
},
{
langs: []string{"python", "py"},
url: "https://docs.python.org/3/library/{module}.html",
examples: []string{"devour get python asyncio", "devour get py requests", "devour get python stdlib"},
},
{
langs: []string{"java"},
url: "https://docs.oracle.com/javase/8/docs/api/{package}.html",
examples: []string{"devour get java string", "devour get java arraylist"},
},
{
langs: []string{"spring"},
url: "https://docs.spring.io/spring-boot/docs/current/reference/htmlsingle/#{section}",
examples: []string{"devour get spring boot", "devour get spring testing"},
},
{
langs: []string{"typescript", "ts"},
url: "https://www.typescriptlang.org/docs/handbook/{topic}.html",
examples: []string{"devour get typescript interfaces", "devour get ts decorators"},
},
{
langs: []string{"react"},
url: "https://react.dev/reference/react/{feature}",
examples: []string{"devour get react hooks", "devour get react components", "devour get react state"},
},
{
langs: []string{"vue"},
url: "https://vuejs.org/guide/{topic}.html",
examples: []string{"devour get vue components", "devour get vue reactivity"},
},
{
langs: []string{"nuxt"},
url: "https://nuxt.com/docs/guide/{topic}",
examples: []string{"devour get nuxt routing", "devour get nuxt middleware"},
},
{
langs: []string{"docker"},
url: "https://docs.docker.com/{topic}",
examples: []string{"devour get docker compose", "devour get docker build", "devour get docker networking"},
},
{
langs: []string{"cloudflare", "cf"},
url: "https://developers.cloudflare.com/{topic}",
examples: []string{"devour get cloudflare workers", "devour get cf pages", "devour get cloudflare dns"},
},
{
langs: []string{"astro"},
url: "https://docs.astro.build/en/guides/{topic}",
examples: []string{"devour get astro routing", "devour get astro components"},
},
rows := []languageInfo{
{Canonical: "go", Aliases: []string{"go", "golang"}, Example: "devour get go http", Source: "pkg.go.dev"},
{Canonical: "rust", Aliases: []string{"rust"}, Example: "devour get rust tokio", Source: "docs.rs"},
{Canonical: "python", Aliases: []string{"python", "py"}, Example: "devour get python asyncio", Source: "docs.python.org"},
{Canonical: "java", Aliases: []string{"java"}, Example: "devour get java string", Source: "docs.oracle.com"},
{Canonical: "spring", Aliases: []string{"spring"}, Example: "devour get spring mcp", Source: "docs.spring.io"},
{Canonical: "typescript", Aliases: []string{"typescript", "ts"}, Example: "devour get ts interfaces", Source: "typescriptlang.org"},
{Canonical: "react", Aliases: []string{"react"}, Example: "devour get react hooks", Source: "react.dev"},
{Canonical: "vue", Aliases: []string{"vue"}, Example: "devour get vue reactivity", Source: "vuejs.org"},
{Canonical: "nuxt", Aliases: []string{"nuxt"}, Example: "devour get nuxt routing", Source: "nuxt.com"},
{Canonical: "docker", Aliases: []string{"docker"}, Example: "devour get docker compose", Source: "docs.docker.com"},
{Canonical: "cloudflare", Aliases: []string{"cloudflare", "cf"}, Example: "devour get cloudflare workers", Source: "developers.cloudflare.com"},
{Canonical: "astro", Aliases: []string{"astro"}, Example: "devour get astro components", Source: "docs.astro.build"},
{Canonical: "csharp", Aliases: []string{"csharp", "cs"}, Example: "devour get csharp regex", Source: "learn.microsoft.com"},
{Canonical: "kotlin", Aliases: []string{"kotlin", "kt"}, Example: "devour get kotlin strings", Source: "kotlinlang.org"},
{Canonical: "php", Aliases: []string{"php"}, Example: "devour get php pcre", Source: "php.net"},
{Canonical: "ruby", Aliases: []string{"ruby", "rb"}, Example: "devour get ruby Regexp", Source: "ruby-doc.org"},
{Canonical: "elixir", Aliases: []string{"elixir", "ex"}, Example: "devour get elixir String", Source: "hexdocs.pm"},
{Canonical: "nextjs", Aliases: []string{"next", "nextjs"}, Example: "devour get nextjs routing", Source: "nextjs.org"},
{Canonical: "svelte", Aliases: []string{"svelte"}, Example: "devour get svelte kit", Source: "svelte.dev"},
{Canonical: "angular", Aliases: []string{"angular", "ng"}, Example: "devour get angular http", Source: "angular.dev"},
{Canonical: "remix", Aliases: []string{"remix"}, Example: "devour get remix routes", Source: "v2.remix.run"},
{Canonical: "solid", Aliases: []string{"solid", "solidjs"}, Example: "devour get solid router", Source: "github.com/solidjs/solid-docs"},
{Canonical: "express", Aliases: []string{"express", "expressjs"}, Example: "devour get express middleware", Source: "expressjs.com"},
}
for _, lang := range languages {
fmt.Printf("🔷 %s\n", strings.Join(lang.langs, ", "))
fmt.Printf(" URL: %s\n", lang.url)
fmt.Printf(" Examples:\n")
for _, example := range lang.examples {
fmt.Printf(" • %s\n", example)
switch strings.ToLower(strings.TrimSpace(languagesFormat)) {
case "json":
out := struct {
Count int `json:"count"`
Languages []languageInfo `json:"languages"`
}{
Count: len(rows),
Languages: rows,
}
fmt.Println()
enc := json.NewEncoder(cmd.OutOrStdout())
enc.SetIndent("", " ")
return enc.Encode(out)
case "text", "":
printLanguagesText(cmd.OutOrStdout(), rows)
return nil
default:
return fmt.Errorf("unsupported format: %s", languagesFormat)
}
fmt.Println("💡 Pro Tips:")
fmt.Println(" • Use 'devour get <language> help' for language-specific help")
fmt.Println(" • Add --format markdown for enhanced documentation")
fmt.Println(" • Most languages support common aliases (e.g., py → python)")
fmt.Println()
fmt.Println("🚀 Quick Start:")
fmt.Println(" devour get go http --format markdown")
fmt.Println(" devour get python asyncio")
fmt.Println(" devour get react hooks")
return nil
}
func printLanguagesText(out io.Writer, rows []languageInfo) {
_, _ = fmt.Fprintln(out, "Devour Supported Languages")
_, _ = fmt.Fprintln(out, "============================================")
_, _ = fmt.Fprintln(out)
for _, row := range rows {
_, _ = fmt.Fprintf(out, "- %s (%s)\n", row.Canonical, strings.Join(row.Aliases, ", "))
_, _ = fmt.Fprintf(out, " source: %s\n", row.Source)
_, _ = fmt.Fprintf(out, " example: %s\n\n", row.Example)
}
_, _ = fmt.Fprintln(out, "Tip: use 'devour get <language> <keyword> --format markdown' for readable output.")
}
+63
View File
@@ -0,0 +1,63 @@
package cmd
import (
"bytes"
"encoding/json"
"strings"
"testing"
)
func TestLanguagesJSONFormat(t *testing.T) {
prev := languagesFormat
defer func() { languagesFormat = prev }()
languagesFormat = "json"
var buf bytes.Buffer
languagesCmd.SetOut(&buf)
if err := runLanguages(languagesCmd, nil); err != nil {
t.Fatalf("runLanguages returned error: %v", err)
}
var payload struct {
Count int `json:"count"`
Languages []struct {
Canonical string `json:"canonical"`
Aliases []string `json:"aliases"`
} `json:"languages"`
}
if err := json.Unmarshal(buf.Bytes(), &payload); err != nil {
t.Fatalf("invalid json output: %v", err)
}
if payload.Count == 0 || len(payload.Languages) == 0 {
t.Fatalf("expected non-empty languages payload, got %+v", payload)
}
foundNext := false
for _, l := range payload.Languages {
if l.Canonical == "nextjs" {
foundNext = true
break
}
}
if !foundNext {
t.Fatalf("expected nextjs in JSON payload, got %+v", payload.Languages)
}
}
func TestLanguagesTextFormat(t *testing.T) {
prev := languagesFormat
defer func() { languagesFormat = prev }()
languagesFormat = "text"
var buf bytes.Buffer
languagesCmd.SetOut(&buf)
if err := runLanguages(languagesCmd, nil); err != nil {
t.Fatalf("runLanguages returned error: %v", err)
}
out := buf.String()
if !strings.Contains(out, "Devour Supported Languages") {
t.Fatalf("unexpected text output: %q", out)
}
}
+78 -26
View File
@@ -1,25 +1,32 @@
package cmd
import (
"context"
"fmt"
"net/url"
"os"
"strings"
"github.com/spf13/cobra"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/internal/search"
"github.com/yourorg/devour/internal/storage"
)
var pushCmd = &cobra.Command{
Use: "push <path>",
Short: "Push documents to remote MCP server",
Long: `Push local documents to a remote Devour MCP server.
Short: "Import local documents into Devour storage/index",
Long: `Push local documents into your Devour local workspace.
Useful for:
- Syncing local documentation to a shared server
- Backing up indexed content
- Contributing to a team knowledge base
Current stable behavior:
- local ingest into docs storage
- local reindex for query/ask/status
Remote push is experimental and not enabled by default.
Examples:
devour push ./docs
devour push ./docs --server http://devour.company.com
devour push ./docs --server http://localhost:8080 --project my-project`,
devour push ./docs --project my-project`,
Args: cobra.ExactArgs(1),
RunE: runPush,
}
@@ -30,33 +37,78 @@ var (
)
func init() {
pushCmd.Flags().StringVar(&pushServer, "server", "", "remote Devour server URL")
pushCmd.Flags().StringVarP(&pushProject, "project", "p", "", "project name on remote server")
pushCmd.Flags().StringVar(&pushServer, "server", "", "remote Devour server URL (experimental)")
pushCmd.Flags().StringVarP(&pushProject, "project", "p", "", "project name label")
}
func runPush(cmd *cobra.Command, args []string) error {
path := args[0]
if pushServer == "" {
// Try to get from config
pushServer = "http://localhost:8080"
if _, err := os.Stat(path); err != nil {
return fmt.Errorf("path does not exist: %s", path)
}
fmt.Printf("📤 Pushing to: %s\n", pushServer)
fmt.Printf(" Path: %s\n", path)
if pushProject != "" {
fmt.Printf(" Project: %s\n", pushProject)
cfg, err := loadAppConfig()
if err != nil {
return err
}
// TODO: Implement actual push logic
// 1. Scan path for documents
// 2. Connect to remote server
// 3. Upload documents
// 4. Wait for indexing confirmation
server := strings.TrimSpace(pushServer)
if server != "" && !isLocalServer(server) {
return fmt.Errorf("remote push is experimental and not enabled in this build; use local push without --server")
}
fmt.Println()
fmt.Println("⚠️ Push functionality not yet implemented")
fmt.Println(" Remote server support coming soon")
projectName := strings.TrimSpace(pushProject)
if projectName == "" {
projectName = "local-push"
}
fmt.Printf("📤 Ingesting local docs from: %s\n", path)
fmt.Printf(" Project: %s\n", projectName)
fmt.Printf(" Target docs dir: %s\n", cfg.Storage.DocsDir)
s := scraper.NewScraper(scraper.SourceTypeLocal, toScraperConfig(cfg, 0))
if s == nil {
return fmt.Errorf("local scraper not available")
}
docs, err := s.Scrape(context.Background(), &scraper.Source{
Name: projectName,
Type: scraper.SourceTypeLocal,
Path: path,
Include: []string{`.*`},
})
if err != nil {
return fmt.Errorf("local ingest failed: %w", err)
}
saved, err := storage.SaveDocuments(docs, storage.SaveOptions{
Format: "json",
OutputDir: cfg.Storage.DocsDir,
AllowEmpty: false,
PrintWriter: nil,
})
if err != nil {
return fmt.Errorf("save docs failed: %w", err)
}
engine := search.NewEngine(cfg)
stats, err := engine.Rebuild(context.Background())
if err != nil {
return fmt.Errorf("reindex failed: %w", err)
}
fmt.Println("\n✓ Push complete")
fmt.Printf(" Documents imported: %d\n", saved.Count)
fmt.Printf(" Index docs: %d\n", stats.Documents)
fmt.Printf(" Index path: %s\n", stats.IndexPath)
return nil
}
func isLocalServer(raw string) bool {
u, err := url.Parse(raw)
if err != nil {
return false
}
host := strings.ToLower(u.Hostname())
return host == "" || host == "localhost" || host == "127.0.0.1"
}
+39
View File
@@ -6,6 +6,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/spf13/cobra"
@@ -218,6 +219,7 @@ func runQualityScan(cmd *cobra.Command, args []string) error {
if err != nil {
return fmt.Errorf("scan failed: %w", err)
}
result.Findings = quality.AttachDocsEvidence(lang, result.Findings)
return outputScanResult(result, qualityFormat)
}
@@ -256,9 +258,11 @@ func runQualityStatus(cmd *cobra.Command, args []string) error {
return json.NewEncoder(os.Stdout).Encode(scorecard)
case "strict":
fmt.Println(scorer.FormatStrictScorecard(findings, lastScan))
printQualityEvidenceSummary(findings)
return nil
default:
fmt.Println(scorer.FormatScorecard(scorecard))
printQualityEvidenceSummary(findings)
return nil
}
}
@@ -318,6 +322,17 @@ func runQualityNext(cmd *cobra.Command, args []string) error {
fmt.Printf("Score: %d\n", next.Score)
fmt.Printf("ID: %s\n", next.ID)
fmt.Printf("\nDescription:\n%s\n", next.Description)
if next.Metadata != nil {
if urls := strings.TrimSpace(next.Metadata["docs_evidence_urls"]); urls != "" {
fmt.Printf("\nEvidence Docs:\n%s\n", urls)
}
if rationale := strings.TrimSpace(next.Metadata["docs_evidence_rationale"]); rationale != "" {
fmt.Printf("\nRationale:\n%s\n", rationale)
}
if confidence := strings.TrimSpace(next.Metadata["docs_evidence_confidence"]); confidence != "" {
fmt.Printf("Evidence confidence: %s\n", confidence)
}
}
if explain {
fmt.Printf("\nExplanation:\n")
@@ -693,3 +708,27 @@ func importReviewResponses(dataDir string, filename string) error {
return nil
}
func printQualityEvidenceSummary(findings []quality.Finding) {
totalWithEvidence := 0
for _, f := range findings {
if f.Metadata != nil && strings.TrimSpace(f.Metadata["docs_evidence_urls"]) != "" {
totalWithEvidence++
}
}
if totalWithEvidence == 0 {
return
}
fmt.Printf("\nEvidence-linked findings: %d/%d\n", totalWithEvidence, len(findings))
for _, f := range findings {
if f.Metadata == nil {
continue
}
urls := strings.TrimSpace(f.Metadata["docs_evidence_urls"])
if urls == "" {
continue
}
fmt.Printf(" • %s:%d - %s\n %s\n", filepath.Base(f.File), f.Line, f.Title, urls)
break
}
}
+100 -18
View File
@@ -1,9 +1,14 @@
package cmd
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/spf13/cobra"
appconfig "github.com/yourorg/devour/internal/config"
"github.com/yourorg/devour/internal/search"
)
var queryCmd = &cobra.Command{
@@ -29,32 +34,109 @@ var (
)
func init() {
queryCmd.Flags().IntVarP(&queryLimit, "limit", "l", 5, "maximum number of results")
queryCmd.Flags().IntVarP(&queryLimit, "limit", "n", 5, "maximum number of results")
queryCmd.Flags().StringVarP(&queryFormat, "format", "f", "text", "output format (text, json, markdown)")
queryCmd.Flags().Float64Var(&queryThreshold, "threshold", 0.7, "similarity threshold (0-1)")
queryCmd.Flags().Float64Var(&queryThreshold, "threshold", 0, "minimum lexical score threshold")
}
func runQuery(cmd *cobra.Command, args []string) error {
query := args[0]
if len(args) > 1 {
query = fmt.Sprintf("%s", args)
query := strings.TrimSpace(strings.Join(args, " "))
if query == "" {
return fmt.Errorf("query cannot be empty")
}
fmt.Printf("Searching: %q\n", query)
fmt.Printf(" Limit: %d\n", queryLimit)
fmt.Printf(" Threshold: %.2f\n", queryThreshold)
fmt.Println()
cfg, err := loadAppConfig()
if err != nil {
return err
}
// TODO: Implement actual query logic
// 1. Generate embedding for query
// 2. Search vector database
// 3. Format and return results
engine := search.NewEngine(cfg)
results, stats, err := engine.Search(context.Background(), query, search.SearchOptions{
Limit: queryLimit,
Threshold: queryThreshold,
})
if err != nil {
return fmt.Errorf("query failed: %w", err)
}
// Placeholder results
fmt.Println("Results:")
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
fmt.Println("⚠️ Query functionality not yet implemented")
fmt.Println(" Index some documents first with 'devour scrape'")
switch strings.ToLower(queryFormat) {
case "json":
resp := map[string]any{
"query": query,
"limit": queryLimit,
"threshold": queryThreshold,
"count": len(results),
"results": results,
"indexed_at": stats.LastIndexedAt,
"documents": stats.Documents,
}
enc := json.NewEncoder(cmd.OutOrStdout())
enc.SetIndent("", " ")
return enc.Encode(resp)
case "markdown":
return printQueryMarkdown(cmd, query, cfg, results, stats)
case "text":
return printQueryText(cmd, query, cfg, results, stats)
default:
return fmt.Errorf("unsupported format: %s (supported: text, json, markdown)", queryFormat)
}
}
func printQueryText(cmd *cobra.Command, query string, cfg *appconfig.Config, results []search.Result, stats *search.IndexStats) error {
fmt.Fprintf(cmd.OutOrStdout(), "Searching: %q\n", query)
fmt.Fprintf(cmd.OutOrStdout(), " Limit: %d\n", queryLimit)
fmt.Fprintf(cmd.OutOrStdout(), " Threshold: %.2f\n", queryThreshold)
fmt.Fprintf(cmd.OutOrStdout(), " Indexed docs: %d\n", stats.Documents)
fmt.Fprintf(cmd.OutOrStdout(), " Docs dir: %s\n\n", cfg.Storage.DocsDir)
if len(results) == 0 {
fmt.Fprintln(cmd.OutOrStdout(), "No results found.")
return nil
}
fmt.Fprintln(cmd.OutOrStdout(), "Results:")
fmt.Fprintln(cmd.OutOrStdout(), "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
for i, r := range results {
fmt.Fprintf(cmd.OutOrStdout(), "%d. %s\n", i+1, r.Title)
fmt.Fprintf(cmd.OutOrStdout(), " Score: %.3f | Type: %s | Source: %s\n", r.Score, r.Type, defaultSource(r.Source))
if r.URL != "" {
fmt.Fprintf(cmd.OutOrStdout(), " URL: %s\n", r.URL)
}
fmt.Fprintf(cmd.OutOrStdout(), " Snippet: %s\n\n", r.Snippet)
}
return nil
}
func printQueryMarkdown(cmd *cobra.Command, query string, cfg *appconfig.Config, results []search.Result, stats *search.IndexStats) error {
fmt.Fprintf(cmd.OutOrStdout(), "# Query Results\n\n")
fmt.Fprintf(cmd.OutOrStdout(), "- Query: `%s`\n", query)
fmt.Fprintf(cmd.OutOrStdout(), "- Limit: `%d`\n", queryLimit)
fmt.Fprintf(cmd.OutOrStdout(), "- Threshold: `%.2f`\n", queryThreshold)
fmt.Fprintf(cmd.OutOrStdout(), "- Indexed docs: `%d`\n", stats.Documents)
fmt.Fprintf(cmd.OutOrStdout(), "- Docs dir: `%s`\n\n", cfg.Storage.DocsDir)
if len(results) == 0 {
fmt.Fprintln(cmd.OutOrStdout(), "_No results found._")
return nil
}
for i, r := range results {
fmt.Fprintf(cmd.OutOrStdout(), "## %d. %s\n\n", i+1, r.Title)
fmt.Fprintf(cmd.OutOrStdout(), "- Score: `%.3f`\n", r.Score)
fmt.Fprintf(cmd.OutOrStdout(), "- Type: `%s`\n", r.Type)
fmt.Fprintf(cmd.OutOrStdout(), "- Source: `%s`\n", defaultSource(r.Source))
if r.URL != "" {
fmt.Fprintf(cmd.OutOrStdout(), "- URL: %s\n", r.URL)
}
fmt.Fprintf(cmd.OutOrStdout(), "\n%s\n\n", r.Snippet)
}
return nil
}
func defaultSource(source string) string {
source = strings.TrimSpace(source)
if source == "" {
return "unknown"
}
return source
}
+2
View File
@@ -6,6 +6,7 @@ import (
"time"
"github.com/yourorg/devour/internal/scraper"
_ "github.com/yourorg/devour/internal/scraper/external"
)
func main() {
@@ -90,6 +91,7 @@ func main() {
scraper.SourceTypeGitHub,
scraper.SourceTypeOpenAPI,
scraper.SourceTypeLocal,
scraper.SourceTypeLocalSearch,
scraper.SourceTypeGoDocs,
scraper.SourceTypeRustDocs,
scraper.SourceTypePythonDocs,
+6 -1
View File
@@ -6,6 +6,7 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/viper"
_ "github.com/yourorg/devour/internal/scraper/external"
"github.com/yourorg/devour/internal/ui"
)
@@ -33,7 +34,8 @@ context to AI models for generating accurate, fully working code.
Runs in two modes:
- Local mode: OpenCode skill running entirely on your machine
- Remote mode: MCP server for multi-user/team access`,
Version: "1.0.0",
Version: "1.0.0",
SilenceUsage: true,
}
func Execute() {
@@ -53,6 +55,7 @@ func init() {
rootCmd.AddCommand(initCmd)
rootCmd.AddCommand(scrapeCmd)
rootCmd.AddCommand(getCmd)
rootCmd.AddCommand(askCmd)
rootCmd.AddCommand(languagesCmd)
rootCmd.AddCommand(demoCmd)
rootCmd.AddCommand(serveCmd)
@@ -62,6 +65,8 @@ func init() {
rootCmd.AddCommand(pushCmd)
rootCmd.AddCommand(logoCmd)
rootCmd.AddCommand(scorecardCmd)
rootCmd.AddCommand(autoCmd)
rootCmd.AddCommand(verifyCmd)
}
// logoCmd displays the Devour character
+31
View File
@@ -0,0 +1,31 @@
package cmd
import "testing"
func TestRootCommandsAreUnique(t *testing.T) {
seen := map[string]bool{}
for _, c := range rootCmd.Commands() {
name := c.Name()
if seen[name] {
t.Fatalf("duplicate root command registered: %s", name)
}
seen[name] = true
}
}
func TestQueryLimitShorthandIsN(t *testing.T) {
flag := queryCmd.Flags().Lookup("limit")
if flag == nil {
t.Fatal("query --limit flag not found")
}
if flag.Shorthand != "n" {
t.Fatalf("expected query --limit shorthand to be n, got %q", flag.Shorthand)
}
}
func TestRootExecuteQueryNoPanic(t *testing.T) {
rootCmd.SetArgs([]string{"query", "http client", "--limit", "1"})
if _, err := rootCmd.ExecuteC(); err != nil {
t.Fatalf("query execution should not panic; got error: %v", err)
}
}
+81
View File
@@ -0,0 +1,81 @@
package cmd
import (
"fmt"
"path/filepath"
"strings"
"time"
appconfig "github.com/yourorg/devour/internal/config"
"github.com/yourorg/devour/internal/scraper"
)
func loadAppConfig() (*appconfig.Config, error) {
cfg, err := appconfig.Load(cfgFile)
if err != nil {
return nil, err
}
if err := cfg.EnsureStorageDirs(); err != nil {
return nil, fmt.Errorf("ensure storage dirs: %w", err)
}
return cfg, nil
}
func toScraperConfig(c *appconfig.Config, concurrencyOverride int) *scraper.Config {
sc := &scraper.Config{
UserAgent: c.Scraper.UserAgent,
Timeout: c.Scraper.Timeout,
RetryCount: c.Scraper.RetryCount,
RetryDelay: c.Scraper.RetryDelay,
Concurrency: c.Scraper.Concurrency,
RateLimit: c.Scraper.RateLimit,
MaxDepth: c.Scraper.MaxDepth,
CacheDir: c.Scraper.CacheDir,
}
if concurrencyOverride > 0 {
sc.Concurrency = concurrencyOverride
}
if sc.Timeout <= 0 {
sc.Timeout = 30 * time.Second
}
if sc.RetryCount <= 0 {
sc.RetryCount = 3
}
if sc.RetryDelay <= 0 {
sc.RetryDelay = 1 * time.Second
}
if sc.Concurrency <= 0 {
sc.Concurrency = 10
}
if sc.MaxDepth <= 0 {
sc.MaxDepth = 2
}
return sc
}
func sourceFromConfig(s appconfig.SourceConfig) *scraper.Source {
return &scraper.Source{
Name: strings.TrimSpace(s.Name),
Type: scraper.SourceType(strings.TrimSpace(s.Type)),
URL: strings.TrimSpace(s.URL),
Query: strings.TrimSpace(s.Query),
ResultLimit: s.ResultLimit,
Domains: append([]string(nil), s.Domains...),
Repo: strings.TrimSpace(s.Repo),
Branch: strings.TrimSpace(s.Branch),
Path: strings.TrimSpace(s.Path),
Include: append([]string(nil), s.Include...),
Exclude: append([]string(nil), s.Exclude...),
Schedule: strings.TrimSpace(s.Schedule),
}
}
func resolveOutputDir(c *appconfig.Config, override string) string {
if strings.TrimSpace(override) != "" {
return override
}
if strings.TrimSpace(c.Storage.DocsDir) != "" {
return c.Storage.DocsDir
}
return filepath.Join("devour_data", "docs")
}
-1
View File
@@ -37,7 +37,6 @@ Examples:
}
func init() {
rootCmd.AddCommand(scorecardCmd)
scorecardCmd.Flags().BoolVar(&scorecardCompact, "compact", false, "Generate compact banner only")
scorecardCmd.Flags().BoolVar(&scorecardDetailed, "detailed", false, "Generate detailed banner only")
scorecardCmd.Flags().StringVarP(&scorecardOutput, "output", "o", "lighthouse_scorecard", "Output filename prefix")
+302 -98
View File
@@ -2,17 +2,23 @@ package cmd
import (
"context"
"encoding/json"
"crypto/sha256"
"encoding/hex"
"fmt"
"net/url"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/spf13/cobra"
"github.com/yourorg/devour/internal/markdown"
appconfig "github.com/yourorg/devour/internal/config"
"github.com/yourorg/devour/internal/projectstate"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/internal/search"
"github.com/yourorg/devour/internal/storage"
"gopkg.in/yaml.v3"
)
var scrapeCmd = &cobra.Command{
@@ -34,144 +40,283 @@ Supported source types:
- dockerdocs: Docker (docs.docker.com)
- cloudflaredocs: Cloudflare (developers.cloudflare.com)
- astrodocs: Astro (docs.astro.build)
- localsearch: Self-hosted search API returning JSON results
- url: Generic web pages
- github: GitHub repositories
- openapi: OpenAPI/Swagger specs
- local: Local files/directories
Examples:
devour scrape https://pkg.go.dev/net/http --type godocs
devour scrape https://react.dev/reference/react --type reactdocs
devour scrape https://developers.cloudflare.com/ --type cloudflaredocs
devour scrape http://127.0.0.1:8080/search --type localsearch --search-query "golang http client"
devour scrape --sources sources.yaml`,
Args: cobra.MaximumNArgs(1),
RunE: runScrape,
}
var (
scrapeFormat string
scrapeSources string
scrapeOutput string
scrapeConcurrency int
scrapeType string
scrapeFormat string
scrapeSources string
scrapeOutput string
scrapeConcurrency int
scrapeType string
scrapeSearchQuery string
scrapeSearchLimit int
scrapeSearchDomains []string
scrapeInclude []string
scrapeExclude []string
scrapeAllowEmpty bool
)
func init() {
scrapeCmd.Flags().StringVarP(&scrapeFormat, "format", "f", "json", "output format (json, markdown)")
scrapeCmd.Flags().StringVarP(&scrapeSources, "sources", "s", "", "YAML file with source definitions")
scrapeCmd.Flags().StringVarP(&scrapeOutput, "output", "o", "", "output directory (default: devour_data/docs)")
scrapeCmd.Flags().StringVarP(&scrapeOutput, "output", "o", "", "output directory (default: configured docs dir)")
scrapeCmd.Flags().IntVar(&scrapeConcurrency, "concurrency", 10, "parallel scraping workers")
scrapeCmd.Flags().StringVarP(&scrapeType, "type", "t", "", "source type (auto-detected if not specified)")
scrapeCmd.Flags().StringVar(&scrapeSearchQuery, "search-query", "", "search query for --type localsearch")
scrapeCmd.Flags().IntVar(&scrapeSearchLimit, "search-limit", 8, "max result URLs to scrape for --type localsearch")
scrapeCmd.Flags().StringSliceVar(&scrapeSearchDomains, "search-domain", nil, "restrict localsearch results to these domains (repeatable)")
scrapeCmd.Flags().StringSliceVar(&scrapeInclude, "include", nil, "include URL/file regex patterns (repeatable)")
scrapeCmd.Flags().StringSliceVar(&scrapeExclude, "exclude", nil, "exclude URL/file regex patterns (repeatable)")
scrapeCmd.Flags().BoolVar(&scrapeAllowEmpty, "allow-empty", false, "allow success when no documents were extracted")
}
func runScrape(cmd *cobra.Command, args []string) error {
cfg, err := loadAppConfig()
if err != nil {
return err
}
if scrapeSources != "" {
return scrapeFromConfig(scrapeSources)
return scrapeFromConfig(cmd, cfg, scrapeSources)
}
if len(args) == 0 {
return fmt.Errorf("source argument required when not using --sources flag")
}
sourceURL := args[0]
config := &scraper.Config{
UserAgent: "Devour/1.0 (Documentation Scraper)",
Timeout: 30 * time.Second,
RetryCount: 3,
RetryDelay: 1 * time.Second,
Concurrency: scrapeConcurrency,
}
sourceURL := strings.TrimSpace(args[0])
sourceType := scraper.SourceType(scrapeType)
if sourceType == "" {
sourceType = detectSourceType(sourceURL)
}
fmt.Printf("Scraping: %s\n", sourceURL)
fmt.Printf(" Type: %s\n", sourceType)
fmt.Printf(" Concurrency: %d\n", scrapeConcurrency)
source := &scraper.Source{
Name: extractName(sourceURL),
Type: sourceType,
URL: sourceURL,
Query: strings.TrimSpace(scrapeSearchQuery),
ResultLimit: scrapeSearchLimit,
Domains: append([]string(nil), scrapeSearchDomains...),
Include: append([]string(nil), scrapeInclude...),
Exclude: append([]string(nil), scrapeExclude...),
}
if sourceType == scraper.SourceTypeLocal {
source.Path = sourceURL
}
applySourceProfile(source)
outputDir := resolveOutputDir(cfg, scrapeOutput)
count, err := scrapeOne(cmd, cfg, source, outputDir)
if err != nil {
return err
}
if cfg.Indexing.Enabled {
engine := search.NewEngine(cfg)
if _, err := engine.Rebuild(context.Background()); err != nil {
return fmt.Errorf("reindex after scrape: %w", err)
}
}
fmt.Printf("\n✓ Scraping complete!\n")
fmt.Printf(" Output: %s\n", outputDir)
fmt.Printf(" Documents: %d\n", count)
fmt.Println(" Run 'devour status' to inspect local index health")
return nil
}
func scrapeFromConfig(cmd *cobra.Command, cfg *appconfig.Config, configPath string) error {
raw, err := os.ReadFile(configPath)
if err != nil {
return fmt.Errorf("read sources file: %w", err)
}
var list []appconfig.SourceConfig
if err := yaml.Unmarshal(raw, &list); err != nil || len(list) == 0 {
var wrapped struct {
Sources []appconfig.SourceConfig `yaml:"sources"`
}
if wrapErr := yaml.Unmarshal(raw, &wrapped); wrapErr != nil {
return fmt.Errorf("parse sources file: %w", err)
}
list = wrapped.Sources
}
if len(list) == 0 {
return fmt.Errorf("sources file contains no sources")
}
sort.Slice(list, func(i, j int) bool {
return list[i].Name < list[j].Name
})
outputDir := resolveOutputDir(cfg, scrapeOutput)
success := 0
failures := 0
totalDocs := 0
for _, srcCfg := range list {
source := sourceFromConfig(srcCfg)
if source.Type == "" {
if source.URL != "" {
source.Type = detectSourceType(source.URL)
} else if source.Path != "" {
source.Type = scraper.SourceTypeLocal
}
}
if source.Name == "" {
source.Name = extractName(source.URL)
if source.Name == "unknown" && source.Path != "" {
source.Name = filepath.Base(source.Path)
}
}
applySourceProfile(source)
fmt.Printf("\n=== Source: %s (%s) ===\n", source.Name, source.Type)
count, srcErr := scrapeOne(cmd, cfg, source, outputDir)
if srcErr != nil {
failures++
fmt.Printf("✗ %s failed: %v\n", source.Name, srcErr)
continue
}
totalDocs += count
success++
}
if cfg.Indexing.Enabled {
engine := search.NewEngine(cfg)
if _, err := engine.Rebuild(context.Background()); err != nil {
return fmt.Errorf("reindex after scrape sources: %w", err)
}
}
fmt.Printf("\nSummary: %d succeeded, %d failed, %d docs written\n", success, failures, totalDocs)
if failures > 0 {
return fmt.Errorf("one or more sources failed")
}
return nil
}
func scrapeOne(cmd *cobra.Command, cfg *appconfig.Config, source *scraper.Source, outputDir string) (int, error) {
if source == nil {
return 0, fmt.Errorf("source is required")
}
if source.Type == "" {
return 0, fmt.Errorf("source type is required")
}
if source.Type == scraper.SourceTypeLocalSearch && strings.TrimSpace(source.Query) == "" {
return 0, fmt.Errorf("search query is required for localsearch sources")
}
scraperConfig := toScraperConfig(cfg, scrapeConcurrency)
s := scraper.NewScraper(source.Type, scraperConfig)
if s == nil {
return 0, fmt.Errorf("unsupported source type: %s", source.Type)
}
fmt.Printf("Scraping: %s\n", chooseSourceLabel(source))
fmt.Printf(" Type: %s\n", source.Type)
fmt.Printf(" Concurrency: %d\n", scraperConfig.Concurrency)
if source.Type == scraper.SourceTypeLocalSearch {
fmt.Printf(" Search query: %s\n", source.Query)
fmt.Printf(" Search limit: %d\n", source.ResultLimit)
if len(source.Domains) > 0 {
fmt.Printf(" Search domains: %s\n", strings.Join(source.Domains, ", "))
}
}
fmt.Println()
s := scraper.NewScraper(sourceType, config)
if s == nil {
return fmt.Errorf("unsupported source type: %s", sourceType)
}
source := &scraper.Source{
Name: extractName(sourceURL),
Type: sourceType,
URL: sourceURL,
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), scraperConfig.Timeout*2)
defer cancel()
docs, err := s.Scrape(ctx, source)
if err != nil {
return fmt.Errorf("scraping failed: %w", err)
return 0, fmt.Errorf("scraping failed: %w", err)
}
fmt.Printf("✓ Scraped %d documents\n\n", len(docs))
if scrapeOutput == "" {
scrapeOutput = "devour_data/docs"
save, err := storage.SaveDocuments(docs, storage.SaveOptions{
Format: scrapeFormat,
OutputDir: outputDir,
AllowEmpty: scrapeAllowEmpty,
PrintWriter: func(format string, args ...any) {
_, _ = fmt.Printf(format, args...)
},
})
if err != nil {
return 0, err
}
if err := os.MkdirAll(scrapeOutput, 0755); err != nil {
return fmt.Errorf("failed to create output directory: %w", err)
fmt.Printf("✓ Scraped %d documents\n", save.Count)
if err := updateSourceState(cfg, source, docs); err != nil {
return save.Count, fmt.Errorf("update source state: %w", err)
}
for i, doc := range docs {
var filename string
var content []byte
if scrapeFormat == "markdown" {
filename = fmt.Sprintf("%s_%d.md", sanitizeFilename(doc.Title), i)
// Create enhanced markdown document
markdownDoc := &markdown.Document{
ID: doc.ID,
Source: doc.Source,
Type: string(doc.Type),
Title: doc.Title,
Content: doc.Content,
URL: doc.URL,
Metadata: doc.Metadata,
Hash: doc.Hash,
Timestamp: doc.Timestamp,
}
formatter := markdown.NewFormatter()
content = []byte(formatter.FormatWithTOC(markdownDoc))
} else {
filename = fmt.Sprintf("%s_%d.json", sanitizeFilename(doc.Title), i)
content, err = json.MarshalIndent(doc, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal document: %w", err)
}
}
filePath := filepath.Join(scrapeOutput, filename)
if err := os.WriteFile(filePath, content, 0644); err != nil {
return fmt.Errorf("failed to write document: %w", err)
}
fmt.Printf(" 📄 %s (%s)\n", filename, doc.Type)
}
fmt.Printf("\n✓ Scraping complete!\n")
fmt.Printf(" Output: %s\n", scrapeOutput)
fmt.Println(" Run 'devour status' to see indexed documents")
return nil
return save.Count, nil
}
func scrapeFromConfig(configPath string) error {
return fmt.Errorf("scraping from config file not yet implemented")
func updateSourceState(cfg *appconfig.Config, source *scraper.Source, docs []*scraper.Document) error {
state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
if err != nil {
return err
}
key := source.Name
if key == "" {
key = chooseSourceLabel(source)
}
h := sha256.New()
for _, d := range docs {
if d == nil {
continue
}
fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL)
}
state.Sources[key] = &projectstate.SourceState{
Name: source.Name,
Type: string(source.Type),
URL: source.URL,
Hash: hex.EncodeToString(h.Sum(nil)),
LastSync: time.Now(),
DocCount: len(docs),
}
return projectstate.SaveSourceState(cfg.Storage.MetadataDir, state)
}
func chooseSourceLabel(source *scraper.Source) string {
if strings.TrimSpace(source.URL) != "" {
return source.URL
}
if strings.TrimSpace(source.Path) != "" {
return source.Path
}
if strings.TrimSpace(source.Repo) != "" {
return source.Repo
}
return source.Name
}
func detectSourceType(sourceURL string) scraper.SourceType {
u, err := url.Parse(sourceURL)
if err != nil {
if sourceURL != "" && !strings.HasPrefix(sourceURL, "http://") && !strings.HasPrefix(sourceURL, "https://") {
return scraper.SourceTypeLocal
}
return scraper.SourceTypeWeb
}
@@ -208,6 +353,11 @@ func detectSourceType(sourceURL string) scraper.SourceType {
return scraper.SourceTypeAstroDocs
case host == "github.com":
return scraper.SourceTypeGitHub
case strings.HasSuffix(path, ".json") || strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml"):
if strings.Contains(strings.ToLower(path), "openapi") || strings.Contains(strings.ToLower(path), "swagger") {
return scraper.SourceTypeOpenAPI
}
return scraper.SourceTypeWeb
default:
return scraper.SourceTypeWeb
}
@@ -216,27 +366,81 @@ func detectSourceType(sourceURL string) scraper.SourceType {
func extractName(sourceURL string) string {
u, err := url.Parse(sourceURL)
if err != nil {
if strings.TrimSpace(sourceURL) != "" {
return filepath.Base(sourceURL)
}
return "unknown"
}
parts := strings.Split(strings.Trim(u.Path, "/"), "/")
if len(parts) > 0 {
if len(parts) > 0 && strings.TrimSpace(parts[len(parts)-1]) != "" {
return parts[len(parts)-1]
}
return u.Host
if strings.TrimSpace(u.Host) != "" {
return u.Host
}
return "unknown"
}
func sanitizeFilename(name string) string {
name = strings.ToLower(name)
name = strings.ReplaceAll(name, " ", "_")
name = strings.ReplaceAll(name, "/", "_")
name = strings.ReplaceAll(name, ":", "_")
name = strings.ReplaceAll(name, ".", "_")
if len(name) > 50 {
name = name[:50]
func applySourceProfile(source *scraper.Source) {
if source == nil {
return
}
if source.Type != scraper.SourceTypeWeb && source.Type != scraper.SourceTypeLocalSearch {
return
}
if strings.TrimSpace(source.URL) == "" {
return
}
return name
u, err := url.Parse(source.URL)
if err != nil {
return
}
host := strings.ToLower(u.Host)
if host == "" {
return
}
// Preserve explicit user-provided patterns.
if len(source.Include) > 0 || len(source.Exclude) > 0 {
return
}
switch {
case strings.Contains(host, "learn.microsoft.com"):
source.Include = []string{`/dotnet/`, `/csharp/`, `/base-types/`}
source.Exclude = []string{`/previous-versions/`, `/answers/`, `/support/`, `/training/`, `/events/`, `/products/`}
case strings.Contains(host, "kotlinlang.org"):
source.Include = []string{`/docs/`}
source.Exclude = []string{`/community/`, `/api/`, `/releases/`}
case strings.Contains(host, "php.net"):
source.Include = []string{`/manual/en/`}
source.Exclude = []string{`/manual/(de|fr|es|ja|ru|pt)/`, `/downloads.php`, `/bugs.php`}
case strings.Contains(host, "ruby-doc.org"):
source.Include = []string{`/core/`}
source.Exclude = []string{`/stdlib/`, `/gems/`}
case strings.Contains(host, "hexdocs.pm"):
source.Include = []string{`/elixir/`}
source.Exclude = []string{`/phoenix/`, `/ecto/`}
case strings.Contains(host, "nextjs.org"):
source.Include = []string{`/docs/`}
source.Exclude = []string{`/showcase`, `/blog`, `/learn/`, `/pricing`}
case strings.Contains(host, "svelte.dev"):
source.Include = []string{`/docs/`}
source.Exclude = []string{`/playground`, `/tutorial`, `/blog`}
case strings.Contains(host, "angular.dev"):
source.Include = []string{`/guide/`, `/api/`, `/tutorials/`}
source.Exclude = []string{`/resources/`, `/playground`}
case strings.Contains(host, "remix.run"):
source.Include = []string{`/docs/`}
source.Exclude = []string{`/blog`, `/conf`, `/merch`}
case strings.Contains(host, "solidjs.com"):
source.Include = []string{`/docs/`}
source.Exclude = []string{`/community`, `/showcase`, `/blog`}
case strings.Contains(host, "expressjs.com"):
source.Include = []string{`/en/(guide|api|advanced)/`}
source.Exclude = []string{`/en/starter/`, `/cn/`, `/fr/`, `/es/`, `/de/`}
}
}
+56
View File
@@ -0,0 +1,56 @@
package cmd
import (
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
appconfig "github.com/yourorg/devour/internal/config"
)
func TestScrapeFromConfig(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte("<html><head><title>Docs</title></head><body><main>" + strings.Repeat("docs content ", 30) + "</main></body></html>"))
}))
defer srv.Close()
tmp := t.TempDir()
cfg := appconfig.Default()
cfg.Storage.DocsDir = filepath.Join(tmp, "docs")
cfg.Storage.IndexDir = filepath.Join(tmp, "index")
cfg.Storage.MetadataDir = filepath.Join(tmp, "metadata")
cfg.Storage.CacheDir = filepath.Join(tmp, "cache")
if err := cfg.EnsureStorageDirs(); err != nil {
t.Fatal(err)
}
sourcesPath := filepath.Join(tmp, "sources.yaml")
yaml := "- name: demo\n type: url\n url: " + srv.URL + "\n"
if err := os.WriteFile(sourcesPath, []byte(yaml), 0o644); err != nil {
t.Fatal(err)
}
oldFormat, oldOutput, oldAllow := scrapeFormat, scrapeOutput, scrapeAllowEmpty
scrapeFormat = "json"
scrapeOutput = cfg.Storage.DocsDir
scrapeAllowEmpty = false
defer func() {
scrapeFormat, scrapeOutput, scrapeAllowEmpty = oldFormat, oldOutput, oldAllow
}()
if err := scrapeFromConfig(nil, cfg, sourcesPath); err != nil {
t.Fatalf("scrapeFromConfig failed: %v", err)
}
entries, err := os.ReadDir(cfg.Storage.DocsDir)
if err != nil {
t.Fatal(err)
}
if len(entries) == 0 {
t.Fatal("expected scraped files")
}
}
+39
View File
@@ -0,0 +1,39 @@
package cmd
import (
"testing"
"github.com/yourorg/devour/internal/scraper"
)
func TestDetectSourceType(t *testing.T) {
tests := []struct {
url string
wantType scraper.SourceType
}{
{"https://pkg.go.dev/net/http", scraper.SourceTypeGoDocs},
{"https://docs.rs/tokio/latest/tokio/", scraper.SourceTypeRustDocs},
{"https://docs.python.org/3/library/asyncio.html", scraper.SourceTypePythonDocs},
{"https://docs.oracle.com/javase/8/docs/api/java/util/List.html", scraper.SourceTypeJavaDocs},
{"https://docs.spring.io/spring-boot/docs/current/reference/htmlsingle/", scraper.SourceTypeSpringDocs},
{"https://www.typescriptlang.org/docs/handbook/2/basic-types.html", scraper.SourceTypeTSDocs},
{"https://react.dev/reference/react", scraper.SourceTypeReactDocs},
{"https://vuejs.org/guide/introduction.html", scraper.SourceTypeVueDocs},
{"https://nuxt.com/docs/guide/directory-structure", scraper.SourceTypeNuxtDocs},
{"https://docs.docker.com/compose", scraper.SourceTypeDockerDocs},
{"https://hub.docker.com/mcp/server/github", scraper.SourceTypeMCPDocs},
{"https://developers.cloudflare.com/workers", scraper.SourceTypeCloudflareDocs},
{"https://docs.astro.build/en/guides/components/", scraper.SourceTypeAstroDocs},
{"https://github.com/yourorg/devour", scraper.SourceTypeGitHub},
{"https://example.com/docs", scraper.SourceTypeWeb},
}
for _, tt := range tests {
t.Run(tt.url, func(t *testing.T) {
got := detectSourceType(tt.url)
if got != tt.wantType {
t.Fatalf("detectSourceType(%q) = %q, want %q", tt.url, got, tt.wantType)
}
})
}
}
+185 -27
View File
@@ -1,25 +1,29 @@
package cmd
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/spf13/cobra"
"github.com/yourorg/devour/internal/projectstate"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/internal/search"
"github.com/yourorg/devour/internal/server"
)
var serveCmd = &cobra.Command{
Use: "serve",
Short: "Start the MCP server",
Long: `Start the Devour MCP server.
Short: "Start the local Devour RPC server",
Long: `Start the Devour RPC server.
In local mode (default), the server communicates via stdio, making it
suitable for use as an OpenCode skill.
In remote mode (--remote flag), the server listens on HTTP and exposes
a REST API for multi-user access.
Local mode (default): JSON-RPC over stdin/stdout for agent/skill integration.
Remote mode (--remote): experimental HTTP RPC endpoint at /rpc.
Examples:
devour serve # Local mode (stdio)
devour serve --remote # Remote mode on default port
devour serve
devour serve --remote
devour serve --remote --port 3000`,
RunE: runServe,
}
@@ -31,31 +35,185 @@ var (
)
func init() {
serveCmd.Flags().BoolVar(&serveRemote, "remote", false, "run as remote HTTP server")
serveCmd.Flags().BoolVar(&serveRemote, "remote", false, "run as remote HTTP server (experimental)")
serveCmd.Flags().IntVarP(&servePort, "port", "p", 8080, "HTTP port (remote mode only)")
serveCmd.Flags().StringVar(&serveHost, "host", "localhost", "HTTP host (remote mode only)")
}
func runServe(cmd *cobra.Command, args []string) error {
if serveRemote {
fmt.Printf("🚀 Starting Devour server in remote mode\n")
fmt.Printf(" Host: %s\n", serveHost)
fmt.Printf(" Port: %d\n", servePort)
fmt.Printf(" URL: http://%s:%d\n", serveHost, servePort)
// TODO: Start HTTP MCP server
return fmt.Errorf("remote mode not yet implemented")
if _, err := loadAppConfig(); err != nil {
return err
}
fmt.Println("🚀 Starting Devour server in local mode (stdio)")
fmt.Println(" Communicating via JSON-RPC over stdin/stdout")
srvCfg := &server.Config{
Mode: "local",
Transport: "stdio",
Host: serveHost,
Port: servePort,
Handler: func(ctx context.Context, method string, params json.RawMessage) (any, error) {
return handleServeMethod(ctx, method, params)
},
}
// TODO: Start stdio MCP server
// Should handle JSON-RPC messages for:
// - devour_query
// - devour_add
// - devour_status
// - devour_sync
if serveRemote {
srvCfg.Mode = "remote"
fmt.Printf("🚀 Starting Devour RPC server in remote experimental mode\n")
fmt.Printf(" URL: http://%s:%d/rpc\n", serveHost, servePort)
} else {
fmt.Println("🚀 Starting Devour RPC server in local mode (stdio)")
fmt.Println(" Protocol: JSON-RPC 2.0 over stdin/stdout")
}
return fmt.Errorf("local mode not yet implemented")
srv := server.NewServer(srvCfg)
return srv.Start(context.Background())
}
func handleServeMethod(ctx context.Context, method string, params json.RawMessage) (any, error) {
// The method implementation needs full typed config. Load per-call to avoid stale state.
loadedCfg, err := loadAppConfig()
if err != nil {
return nil, err
}
switch strings.TrimSpace(method) {
case "devour_query":
var req struct {
Query string `json:"query"`
Limit int `json:"limit"`
Threshold float64 `json:"threshold"`
}
if len(params) > 0 {
_ = json.Unmarshal(params, &req)
}
engine := search.NewEngine(loadedCfg)
results, stats, err := engine.Search(ctx, req.Query, search.SearchOptions{Limit: req.Limit, Threshold: req.Threshold})
if err != nil {
return nil, err
}
return map[string]any{"query": req.Query, "count": len(results), "results": results, "indexed": stats.Documents}, nil
case "devour_status":
docsStats, err := projectstate.CollectDocsStats(loadedCfg.Storage.DocsDir)
if err != nil {
return nil, err
}
state, _ := projectstate.LoadSourceState(loadedCfg.Storage.MetadataDir)
engine := search.NewEngine(loadedCfg)
idxStats, _ := engine.EnsureIndexed(ctx)
return map[string]any{
"documents": docsStats.DocumentCount,
"storage_bytes": docsStats.StorageBytes,
"last_updated": docsStats.LastUpdated,
"sources": state.Sources,
"indexed_docs": idxStats.Documents,
"index_timestamp": idxStats.LastIndexedAt,
}, nil
case "devour_scrape":
var req struct {
Source string `json:"source"`
Type string `json:"type"`
Format string `json:"format"`
Output string `json:"output"`
Query string `json:"query"`
ResultLimit int `json:"result_limit"`
Domains []string `json:"domains"`
Include []string `json:"include"`
Exclude []string `json:"exclude"`
}
if err := json.Unmarshal(params, &req); err != nil {
return nil, err
}
if strings.TrimSpace(req.Source) == "" {
return nil, fmt.Errorf("source is required")
}
st := scraper.SourceType(req.Type)
if st == "" {
st = detectSourceType(req.Source)
}
source := &scraper.Source{
Name: extractName(req.Source),
Type: st,
URL: req.Source,
Query: strings.TrimSpace(req.Query),
ResultLimit: req.ResultLimit,
Domains: append([]string(nil), req.Domains...),
Include: append([]string(nil), req.Include...),
Exclude: append([]string(nil), req.Exclude...),
}
if st == scraper.SourceTypeLocal {
source.Path = req.Source
}
applySourceProfile(source)
prevFormat := scrapeFormat
prevOutput := scrapeOutput
prevAllowEmpty := scrapeAllowEmpty
scrapeFormat = coalesceString(req.Format, "json")
scrapeOutput = req.Output
scrapeAllowEmpty = false
count, err := scrapeOne(nil, loadedCfg, source, resolveOutputDir(loadedCfg, req.Output))
scrapeFormat = prevFormat
scrapeOutput = prevOutput
scrapeAllowEmpty = prevAllowEmpty
if err != nil {
return nil, err
}
return map[string]any{"source": req.Source, "type": st, "documents": count}, nil
case "devour_ask":
var req struct {
Question string `json:"question"`
Limit int `json:"limit"`
}
if err := json.Unmarshal(params, &req); err != nil {
return nil, err
}
if strings.TrimSpace(req.Question) == "" {
return nil, fmt.Errorf("question is required")
}
limit := req.Limit
if limit <= 0 {
limit = 5
}
engine := search.NewEngine(loadedCfg)
results, _, err := engine.Search(ctx, req.Question, search.SearchOptions{Limit: limit})
if err != nil {
return nil, err
}
summary := "No relevant docs found."
if len(results) > 0 {
summary = results[0].Snippet
}
return map[string]any{"question": req.Question, "summary": summary, "sources": results}, nil
case "devour_sync":
prevForce, prevRebuild, prevSource := syncForce, syncRebuild, syncSource
var req struct {
Source string `json:"source"`
Force bool `json:"force"`
Rebuild bool `json:"rebuild"`
}
if len(params) > 0 {
_ = json.Unmarshal(params, &req)
}
syncForce = req.Force
syncRebuild = req.Rebuild
syncSource = req.Source
err := runSync(nil, nil)
syncForce, syncRebuild, syncSource = prevForce, prevRebuild, prevSource
if err != nil {
return nil, err
}
return map[string]any{"ok": true}, nil
default:
return nil, fmt.Errorf("unknown method: %s", method)
}
}
func coalesceString(primary, fallback string) string {
if strings.TrimSpace(primary) != "" {
return primary
}
return fallback
}
+86 -17
View File
@@ -1,10 +1,13 @@
package cmd
import (
"context"
"fmt"
"time"
"github.com/spf13/cobra"
"github.com/yourorg/devour/internal/projectstate"
"github.com/yourorg/devour/internal/search"
"github.com/yourorg/devour/internal/ui"
)
@@ -23,39 +26,105 @@ Shows:
}
func runStatus(cmd *cobra.Command, args []string) error {
cfg, err := loadAppConfig()
if err != nil {
return err
}
// Print the small character mascot
ui.PrintCharacterSmall()
fmt.Println()
ui.PrintHeader("Devour Status")
// TODO: Implement actual status check
// Check:
// - Index existence and health
// - Document count
// - Vector count
// - Last sync time
// - Source status
docsStats, err := projectstate.CollectDocsStats(cfg.Storage.DocsDir)
if err != nil {
return err
}
// Placeholder status
ui.PrintKeyValue("Index Health", "⚠️ Not initialized")
ui.PrintKeyValue("Documents", "0 indexed")
ui.PrintKeyValue("Chunks", "0 total")
ui.PrintKeyValue("Vector Dimension", "1536")
ui.PrintKeyValue("Last Updated", "Never")
ui.PrintKeyValue("Storage Used", "0 MB")
engine := search.NewEngine(cfg)
indexStats, indexErr := engine.EnsureIndexed(context.Background())
indexHealth := "✓ Healthy"
if indexErr != nil {
if docsStats.DocumentCount == 0 {
indexHealth = "⚠️ No docs indexed yet"
} else {
indexHealth = "✗ Index error"
}
}
lastUpdated := "Never"
if !docsStats.LastUpdated.IsZero() {
lastUpdated = docsStats.LastUpdated.Format(time.RFC3339)
}
chunks := 0
if indexStats != nil {
chunks = indexStats.Documents
}
ui.PrintKeyValue("Index Health", indexHealth)
ui.PrintKeyValue("Documents", fmt.Sprintf("%d indexed", docsStats.DocumentCount))
ui.PrintKeyValue("Chunks", fmt.Sprintf("%d total", chunks))
ui.PrintKeyValue("Vector Dimension", fmt.Sprintf("%d", cfg.Embeddings.Dimensions))
ui.PrintKeyValue("Last Updated", lastUpdated)
ui.PrintKeyValue("Storage Used", humanSize(docsStats.StorageBytes))
fmt.Println()
ui.PrintSection("Sources")
ui.PrintInfo(" None configured")
state, stateErr := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
if stateErr != nil || len(state.Sources) == 0 {
ui.PrintInfo(" None tracked yet")
} else {
keys := make([]string, 0, len(state.Sources))
for k := range state.Sources {
keys = append(keys, k)
}
sortStrings(keys)
for _, k := range keys {
s := state.Sources[k]
last := "never"
if !s.LastSync.IsZero() {
last = s.LastSync.Format("2006-01-02 15:04:05")
}
fmt.Printf(" • %s (%s): %d docs, last sync %s\n", s.Name, s.Type, s.DocCount, last)
}
}
fmt.Println()
ui.PrintSection("Next Steps")
fmt.Println(" 1. Run 'devour init' to initialize")
fmt.Println(" 2. Run 'devour scrape <source>' to index documents")
if docsStats.DocumentCount == 0 {
fmt.Println(" 1. Run 'devour scrape <source>' to index documentation")
fmt.Println(" 2. Run 'devour query \"<topic>\"' to search indexed docs")
} else {
fmt.Println(" 1. Run 'devour query \"<topic>\"' for local docs search")
fmt.Println(" 2. Run 'devour ask --lang <lang> \"<question>\"' for structured answers")
}
if indexErr != nil {
fmt.Printf(" ⚠️ Index note: %v\n", indexErr)
}
// Show when check happened
fmt.Printf("\nStatus as of: %s\n", time.Now().Format(time.RFC3339))
return nil
}
func humanSize(b int64) string {
const mb = 1024 * 1024
if b < mb {
return fmt.Sprintf("%d KB", b/1024)
}
return fmt.Sprintf("%.2f MB", float64(b)/float64(mb))
}
func sortStrings(values []string) {
if len(values) < 2 {
return
}
for i := 1; i < len(values); i++ {
for j := i; j > 0 && values[j] < values[j-1]; j-- {
values[j], values[j-1] = values[j-1], values[j]
}
}
}
+157 -17
View File
@@ -1,9 +1,18 @@
package cmd
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"time"
"github.com/spf13/cobra"
"github.com/yourorg/devour/internal/projectstate"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/internal/search"
"github.com/yourorg/devour/internal/storage"
)
var syncCmd = &cobra.Command{
@@ -12,7 +21,7 @@ var syncCmd = &cobra.Command{
Long: `Fetch updates from all configured sources.
Checks each source for changes (using hash or timestamp comparison)
and updates the index accordingly.
and updates the local docs + index accordingly.
Examples:
devour sync # Sync all sources
@@ -34,29 +43,160 @@ func init() {
}
func runSync(cmd *cobra.Command, args []string) error {
cfg, err := loadAppConfig()
if err != nil {
return err
}
if syncRebuild {
fmt.Println("🔄 Rebuilding index from all sources...")
fmt.Println("🔄 Rebuilding local index from configured sources...")
} else {
fmt.Println("🔄 Syncing with configured sources...")
fmt.Println("🔄 Syncing configured sources...")
}
if syncSource != "" {
fmt.Printf(" Source: %s\n", syncSource)
if len(cfg.Sources) == 0 {
fmt.Println("No sources configured. Add sources in devour.yaml first.")
return nil
}
// TODO: Implement actual sync logic
// 1. Load sources from config
// 2. For each source:
// a. Check for changes (hash/timestamp)
// b. If changes detected or --force:
// - Scrape updated content
// - Re-generate embeddings
// - Update index
// 3. Update metadata
state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
if err != nil {
return err
}
fmt.Println()
fmt.Println("⚠️ Sync functionality not yet implemented")
fmt.Println(" Configure sources in devour.yaml first")
updated := 0
skipped := 0
failed := 0
totalDocs := 0
for _, srcCfg := range cfg.Sources {
if syncSource != "" && srcCfg.Name != syncSource {
continue
}
source := sourceFromConfig(srcCfg)
if source.Type == "" {
if source.URL != "" {
source.Type = detectSourceType(source.URL)
} else if source.Path != "" {
source.Type = scraper.SourceTypeLocal
}
}
if source.Name == "" {
source.Name = extractName(source.URL)
}
applySourceProfile(source)
fmt.Printf("\n• %s (%s)\n", source.Name, source.Type)
s := scraper.NewScraper(source.Type, toScraperConfig(cfg, 0))
if s == nil {
failed++
fmt.Printf(" ✗ unsupported source type: %s\n", source.Type)
continue
}
key := source.Name
if key == "" {
key = chooseSourceLabel(source)
}
lastHash := ""
if prev := state.Sources[key]; prev != nil {
lastHash = prev.Hash
}
needsUpdate := syncForce || syncRebuild
newHash := lastHash
if !needsUpdate {
changed, hash, detectErr := s.DetectChanges(context.Background(), source, lastHash)
if detectErr != nil {
fmt.Printf(" ⚠ change detection failed (%v), scraping anyway\n", detectErr)
needsUpdate = true
} else {
needsUpdate = changed
newHash = hash
}
}
if !needsUpdate {
skipped++
fmt.Println(" ✓ no changes")
continue
}
docs, scrapeErr := s.Scrape(context.Background(), source)
if scrapeErr != nil {
failed++
fmt.Printf(" ✗ scrape failed: %v\n", scrapeErr)
state.Sources[key] = &projectstate.SourceState{
Name: source.Name,
Type: string(source.Type),
URL: source.URL,
Hash: lastHash,
LastSync: time.Now(),
DocCount: 0,
LastError: scrapeErr.Error(),
}
continue
}
saved, saveErr := storage.SaveDocuments(docs, storage.SaveOptions{
Format: "json",
OutputDir: cfg.Storage.DocsDir,
AllowEmpty: false,
PrintWriter: nil,
})
if saveErr != nil {
failed++
fmt.Printf(" ✗ save failed: %v\n", saveErr)
continue
}
if newHash == "" {
h := sha256.New()
for _, d := range docs {
if d == nil {
continue
}
fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL)
}
newHash = hex.EncodeToString(h.Sum(nil))
}
state.Sources[key] = &projectstate.SourceState{
Name: source.Name,
Type: string(source.Type),
URL: source.URL,
Hash: newHash,
LastSync: time.Now(),
DocCount: saved.Count,
LastError: "",
}
updated++
totalDocs += saved.Count
fmt.Printf(" ✓ updated (%d docs)\n", saved.Count)
}
if err := projectstate.SaveSourceState(cfg.Storage.MetadataDir, state); err != nil {
return err
}
if syncRebuild || updated > 0 {
engine := search.NewEngine(cfg)
if _, err := engine.Rebuild(context.Background()); err != nil {
return fmt.Errorf("rebuild index: %w", err)
}
}
fmt.Printf("\nSync summary: updated=%d skipped=%d failed=%d docs=%d\n", updated, skipped, failed, totalDocs)
if failed > 0 {
return fmt.Errorf("sync completed with failures")
}
if syncSource != "" && updated == 0 && skipped == 0 && failed == 0 {
return fmt.Errorf("source %q not found in config", syncSource)
}
if strings.TrimSpace(syncSource) != "" {
fmt.Printf("Synced source: %s\n", syncSource)
}
return nil
}
+169
View File
@@ -0,0 +1,169 @@
package cmd
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
"github.com/spf13/cobra"
"github.com/yourorg/devour/internal/scraper"
)
var (
verifyFormat string
verifyTimeout int
)
var verifyCmd = &cobra.Command{
Use: "verify",
Short: "Run Devour verification suites",
Long: `Run deterministic and live verification suites for Devour commands and scrapers.`,
}
var verifySmokeCmd = &cobra.Command{
Use: "smoke",
Short: "Run live docs scraping smoke checks",
Long: `Run an opt-in live network smoke suite and persist a machine-readable report under devour_data/verify/.`,
RunE: runVerifySmoke,
}
func init() {
verifyCmd.AddCommand(verifySmokeCmd)
verifySmokeCmd.Flags().StringVar(&verifyFormat, "format", "text", "output format (text, json)")
verifySmokeCmd.Flags().IntVar(&verifyTimeout, "timeout", 90, "timeout per smoke case in seconds")
}
type verifyCase struct {
Name string `json:"name"`
Type scraper.SourceType `json:"type"`
URL string `json:"url"`
Passed bool `json:"passed"`
Docs int `json:"docs"`
Error string `json:"error,omitempty"`
TookMs int64 `json:"took_ms"`
}
type verifyReport struct {
CreatedAt time.Time `json:"created_at"`
Duration string `json:"duration"`
Passed int `json:"passed"`
Failed int `json:"failed"`
Cases []verifyCase `json:"cases"`
}
func runVerifySmoke(cmd *cobra.Command, args []string) error {
cfg, err := loadAppConfig()
if err != nil {
return err
}
if verifyTimeout <= 0 {
verifyTimeout = 90
}
cases := []verifyCase{
{Name: "Go net/http", Type: scraper.SourceTypeGoDocs, URL: "https://pkg.go.dev/net/http"},
{Name: "Python asyncio", Type: scraper.SourceTypePythonDocs, URL: "https://docs.python.org/3/library/asyncio.html"},
{Name: "React reference", Type: scraper.SourceTypeReactDocs, URL: "https://react.dev/reference/react"},
{Name: "TypeScript handbook", Type: scraper.SourceTypeTSDocs, URL: "https://www.typescriptlang.org/docs/handbook/2/basic-types.html"},
{Name: "Next.js docs", Type: scraper.SourceTypeWeb, URL: "https://nextjs.org/docs"},
{Name: "Svelte docs", Type: scraper.SourceTypeWeb, URL: "https://svelte.dev/docs/kit"},
{Name: "Angular guide", Type: scraper.SourceTypeWeb, URL: "https://angular.dev/guide/http"},
{Name: "Remix docs", Type: scraper.SourceTypeWeb, URL: "https://v2.remix.run/docs"},
{Name: "Solid docs repo", Type: scraper.SourceTypeGitHub, URL: "https://github.com/solidjs/solid-docs"},
{Name: "Express guide", Type: scraper.SourceTypeWeb, URL: "https://expressjs.com/en/guide/routing.html"},
}
startAll := time.Now()
passed := 0
failed := 0
for i := range cases {
c := &cases[i]
caseStart := time.Now()
s := scraper.NewScraper(c.Type, toScraperConfig(cfg, 4))
if s == nil {
c.Error = "scraper not available"
c.Passed = false
failed++
continue
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(verifyTimeout)*time.Second)
docs, err := s.Scrape(ctx, &scraper.Source{Name: c.Name, Type: c.Type, URL: c.URL})
cancel()
c.TookMs = time.Since(caseStart).Milliseconds()
if err != nil {
c.Error = err.Error()
c.Passed = false
failed++
continue
}
c.Docs = len(docs)
if len(docs) == 0 {
c.Error = "0 documents"
c.Passed = false
failed++
continue
}
c.Passed = true
passed++
}
report := verifyReport{
CreatedAt: time.Now(),
Duration: time.Since(startAll).String(),
Passed: passed,
Failed: failed,
Cases: cases,
}
rootDataDir := filepath.Dir(cfg.Storage.DocsDir)
verifyDir := filepath.Join(rootDataDir, "verify")
if err := os.MkdirAll(verifyDir, 0o755); err != nil {
return err
}
filename := fmt.Sprintf("smoke-%s.json", time.Now().Format("20060102-150405"))
reportPath := filepath.Join(verifyDir, filename)
b, err := json.MarshalIndent(report, "", " ")
if err != nil {
return err
}
if err := os.WriteFile(reportPath, b, 0o644); err != nil {
return err
}
switch verifyFormat {
case "json":
enc := json.NewEncoder(cmd.OutOrStdout())
enc.SetIndent("", " ")
if err := enc.Encode(report); err != nil {
return err
}
default:
fmt.Fprintf(cmd.OutOrStdout(), "Smoke verification complete\n")
fmt.Fprintf(cmd.OutOrStdout(), " Passed: %d\n", report.Passed)
fmt.Fprintf(cmd.OutOrStdout(), " Failed: %d\n", report.Failed)
fmt.Fprintf(cmd.OutOrStdout(), " Report: %s\n", reportPath)
for _, c := range report.Cases {
status := "PASS"
if !c.Passed {
status = "FAIL"
}
fmt.Fprintf(cmd.OutOrStdout(), " - [%s] %s (%d docs, %dms)", status, c.Name, c.Docs, c.TookMs)
if c.Error != "" {
fmt.Fprintf(cmd.OutOrStdout(), " error=%s", c.Error)
}
fmt.Fprintln(cmd.OutOrStdout())
}
}
if report.Failed > 0 {
return fmt.Errorf("smoke verification completed with failures")
}
return nil
}