i dont like commits

This commit is contained in:
Tomas Dvorak
2026-02-24 12:10:13 +01:00
parent 898a3c303f
commit 1d72a1cc01
109 changed files with 43586 additions and 8484 deletions
+43
View File
@@ -2,9 +2,11 @@
package ai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strings"
@@ -77,6 +79,8 @@ func (e *APIError) Error() string {
return e.Message
}
const maxHTTPErrorBodyBytes = 2048
// Embed generates embeddings for texts.
func (c *OpenAIClient) Embed(ctx context.Context, texts []string) ([][]float32, error) {
if c.config.APIKey == "" {
@@ -145,6 +149,10 @@ func (c *OpenAIClient) embedBatch(ctx context.Context, model string, texts []str
}
defer resp.Body.Close()
if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
return nil, formatHTTPStatusError("embeddings", resp)
}
var embeddingResp EmbeddingResponse
if err := json.NewDecoder(resp.Body).Decode(&embeddingResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
@@ -252,6 +260,10 @@ func (c *OpenAIClient) QueryWithContext(ctx context.Context, query string, conte
}
defer resp.Body.Close()
if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
return "", formatHTTPStatusError("chat/completions", resp)
}
var chatResp ChatResponse
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
return "", fmt.Errorf("failed to decode response: %w", err)
@@ -296,3 +308,34 @@ func (c *MockClient) Embed(ctx context.Context, texts []string) ([][]float32, er
func (c *MockClient) QueryWithContext(ctx context.Context, query string, context []string) (string, error) {
return "This is a mock response.", nil
}
func formatHTTPStatusError(endpoint string, resp *http.Response) error {
body, err := io.ReadAll(io.LimitReader(resp.Body, maxHTTPErrorBodyBytes))
if err != nil {
return fmt.Errorf("openai %s returned status %d (%s) and body read failed: %w", endpoint, resp.StatusCode, http.StatusText(resp.StatusCode), err)
}
return fmt.Errorf(
"openai %s returned status %d (%s): %s",
endpoint,
resp.StatusCode,
http.StatusText(resp.StatusCode),
extractHTTPErrorMessage(body),
)
}
func extractHTTPErrorMessage(body []byte) string {
trimmed := bytes.TrimSpace(body)
if len(trimmed) == 0 {
return "<empty body>"
}
var payload struct {
Error *APIError `json:"error"`
}
if err := json.Unmarshal(trimmed, &payload); err == nil && payload.Error != nil && strings.TrimSpace(payload.Error.Message) != "" {
return strings.TrimSpace(payload.Error.Message)
}
return string(trimmed)
}
+3 -3
View File
@@ -204,7 +204,7 @@ func Load(explicitPath string) (*Config, error) {
path, err := findConfigPath(explicitPath)
if err != nil {
return nil, err
return nil, fmt.Errorf("resolve config path: %w", err)
}
if path == "" {
cfg.ApplyDefaults()
@@ -328,7 +328,7 @@ func findConfigPath(explicitPath string) (string, error) {
if strings.TrimSpace(explicitPath) != "" {
p, err := filepath.Abs(explicitPath)
if err != nil {
return "", err
return "", fmt.Errorf("resolve absolute config path %q: %w", explicitPath, err)
}
if _, err := os.Stat(p); err != nil {
return "", fmt.Errorf("config file not found: %s", explicitPath)
@@ -361,7 +361,7 @@ func (c *Config) EnsureStorageDirs() error {
continue
}
if err := os.MkdirAll(dir, 0o755); err != nil {
return err
return fmt.Errorf("create storage directory %q: %w", dir, err)
}
}
return nil
-438
View File
@@ -1,438 +0,0 @@
package quality
import (
"fmt"
"sort"
)
type NarrativeGenerator struct {
targetScore int
}
func NewNarrativeGenerator(targetScore int) *NarrativeGenerator {
if targetScore <= 0 {
targetScore = 95
}
return &NarrativeGenerator{targetScore: targetScore}
}
func (g *NarrativeGenerator) Generate(findings []Finding, scorecard *Scorecard, history []StateSnapshot) *Narrative {
phase := g.determinePhase(findings, scorecard)
headline := g.generateHeadline(phase, scorecard)
dimensions := g.analyzeDimensions(findings)
actions := g.generateActions(findings, phase)
strategy := g.generateStrategy(findings, dimensions)
tools := g.generateTools(findings)
debt := g.analyzeDebt(findings, scorecard)
strictTarget := g.calculateStrictTarget(scorecard)
reminders := g.generateReminders(findings, history)
riskFlags := g.identifyRisks(findings, history)
return &Narrative{
Phase: phase,
Headline: headline,
Dimensions: dimensions,
Actions: actions,
Strategy: strategy,
Tools: tools,
Debt: debt,
Milestone: g.generateMilestone(phase, scorecard),
WhyNow: g.explainWhyNow(phase, findings),
RiskFlags: riskFlags,
StrictTarget: strictTarget,
Reminders: reminders,
}
}
func (g *NarrativeGenerator) determinePhase(findings []Finding, scorecard *Scorecard) string {
openCount := 0
t4Count := 0
t3Count := 0
for _, f := range findings {
if f.Status == StatusOpen {
openCount++
if f.Severity == SeverityT4 {
t4Count++
} else if f.Severity == SeverityT3 {
t3Count++
}
}
}
if openCount == 0 {
return "maintenance"
}
if t4Count > 0 {
return "critical"
}
if t3Count > 5 || openCount > 20 {
return "debt_reduction"
}
if openCount > 5 {
return "cleanup"
}
return "polish"
}
func (g *NarrativeGenerator) generateHeadline(phase string, scorecard *Scorecard) string {
switch phase {
case "maintenance":
return "Codebase is healthy! Focus on preventing new debt."
case "critical":
return fmt.Sprintf("Critical issues detected (%d strict score). Address T4 findings first.", scorecard.StrictScore)
case "debt_reduction":
return fmt.Sprintf("Significant technical debt (%d open issues). Systematic cleanup recommended.", scorecard.TotalScore)
case "cleanup":
return fmt.Sprintf("Minor issues detected (%d open). Quick wins available.", scorecard.TotalScore)
default:
return fmt.Sprintf("Codebase in good shape (%d open issues).", scorecard.TotalScore)
}
}
func (g *NarrativeGenerator) analyzeDimensions(findings []Finding) *NarrativeDimensions {
dimensionScores := make(map[Dimension][]Finding)
for _, f := range findings {
if f.Status == StatusOpen {
dim := g.classifyDimension(f)
dimensionScores[dim] = append(dimensionScores[dim], f)
}
}
var lowest []*DimensionInfo
var biggestGap []*DimensionInfo
var stagnant []*DimensionInfo
for dim, dimFindings := range dimensionScores {
info := &DimensionInfo{
Name: string(dim),
Issues: len(dimFindings),
}
impact := 0
for _, f := range dimFindings {
impact += f.Score * int(f.Severity)
}
info.Impact = float64(impact)
lowest = append(lowest, info)
}
sort.Slice(lowest, func(i, j int) bool {
return lowest[i].Impact > lowest[j].Impact
})
if len(lowest) > 5 {
lowest = lowest[:5]
}
return &NarrativeDimensions{
LowestDimensions: lowest,
BiggestGapDimensions: biggestGap,
StagnantDimensions: stagnant,
}
}
func (g *NarrativeGenerator) classifyDimension(f Finding) Dimension {
switch f.Type {
case "complexity", "complexity_ast":
return DimensionCodeQuality
case "duplication", "dupes":
return DimensionDuplication
case "dead_code", "unused_import", "unused":
return DimensionFileHealth
case "security":
return DimensionSecurity
case "naming":
return DimensionNamingQuality
case "import_cycle", "cycles":
return DimensionAbstractionFit
default:
return DimensionCodeQuality
}
}
func (g *NarrativeGenerator) generateActions(findings []Finding, phase string) []string {
var actions []string
t1AutoFixable := 0
t2Quick := 0
t3Judgment := 0
t4Major := 0
for _, f := range findings {
if f.Status != StatusOpen {
continue
}
switch f.Severity {
case SeverityT1:
t1AutoFixable++
case SeverityT2:
t2Quick++
case SeverityT3:
t3Judgment++
case SeverityT4:
t4Major++
}
}
if t4Major > 0 {
actions = append(actions, fmt.Sprintf("Address %d T4 (major refactor) issues - these require architectural changes", t4Major))
}
if t3Judgment > 0 {
actions = append(actions, fmt.Sprintf("Review %d T3 (needs judgment) issues - decide if they need fixing", t3Judgment))
}
if t1AutoFixable > 0 {
actions = append(actions, fmt.Sprintf("Run auto-fixer for %d T1 (auto-fixable) issues", t1AutoFixable))
}
if t2Quick > 0 {
actions = append(actions, fmt.Sprintf("Quick manual fixes available for %d T2 issues", t2Quick))
}
if len(actions) == 0 {
actions = append(actions, "No immediate actions required - maintain code quality")
}
return actions
}
func (g *NarrativeGenerator) generateStrategy(findings []Finding, dimensions *NarrativeDimensions) *NarrativeStrategy {
autoFixable := 0
total := 0
for _, f := range findings {
if f.Status == StatusOpen {
total++
if f.Severity == SeverityT1 {
autoFixable++
}
}
}
var recommendation string
var coverage float64
if total > 0 {
coverage = float64(autoFixable) / float64(total) * 100
}
if coverage > 50 {
recommendation = "Use auto-fixers first, then address remaining issues manually"
} else if autoFixable > 0 {
recommendation = "Start with auto-fixers for quick wins, then prioritize by impact"
} else {
recommendation = "Prioritize by severity and impact, starting with T4 issues"
}
return &NarrativeStrategy{
FixerLeverage: &FixerLeverage{
AutoFixableCount: autoFixable,
TotalCount: total,
Coverage: coverage,
Recommendation: recommendation,
},
CanParallelize: len(findings) > 3,
Hint: g.generateHint(findings),
}
}
func (g *NarrativeGenerator) generateHint(findings []Finding) string {
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT1 {
return "T1 issues can be auto-fixed with 'devour quality fix'"
}
}
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT4 {
return "T4 issues require planning - consider creating a dedicated branch"
}
}
return "Focus on one category at a time for best results"
}
func (g *NarrativeGenerator) generateTools(findings []Finding) *NarrativeTools {
fixers := []interface{}{}
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT1 {
fixers = append(fixers, map[string]string{
"name": f.Type,
"description": fmt.Sprintf("Fix %s issues", f.Type),
})
}
}
return &NarrativeTools{
Fixers: fixers,
Plan: &PlanTool{
Command: "devour quality plan",
Description: "Generate prioritized action plan",
},
Badge: &BadgeTool{
Generated: true,
InReadme: false,
Path: "scorecard.png",
},
}
}
func (g *NarrativeGenerator) analyzeDebt(findings []Finding, scorecard *Scorecard) *NarrativeDebt {
wontfixCount := 0
for _, f := range findings {
if f.Status == StatusWontfix {
wontfixCount++
}
}
var worstDimension string
var worstGap float64
dimensionImpact := make(map[string]float64)
for _, f := range findings {
if f.Status == StatusOpen {
dim := string(g.classifyDimension(f))
dimensionImpact[dim] += float64(f.Score * int(f.Severity))
}
}
for dim, impact := range dimensionImpact {
if impact > worstGap {
worstGap = impact
worstDimension = dim
}
}
return &NarrativeDebt{
OverallGap: float64(scorecard.StrictScore),
WontfixCount: wontfixCount,
WorstDimension: worstDimension,
WorstGap: worstGap,
Trend: "stable",
}
}
func (g *NarrativeGenerator) calculateStrictTarget(scorecard *Scorecard) *StrictTarget {
gap := float64(scorecard.StrictScore) / float64(g.targetScore) * 100
var state string
var warning *string
switch {
case gap >= 100:
state = "at_target"
case gap >= 80:
state = "near_target"
case gap >= 50:
state = "in_progress"
w := "Significant gap to target - consider focused effort"
warning = &w
default:
state = "needs_work"
w := "Large gap to target - prioritize high-impact fixes"
warning = &w
}
return &StrictTarget{
Target: float64(g.targetScore),
Current: float64(scorecard.StrictScore),
Gap: gap,
State: state,
Warning: warning,
}
}
func (g *NarrativeGenerator) generateReminders(findings []Finding, history []StateSnapshot) []string {
var reminders []string
autoFixable := 0
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT1 {
autoFixable++
}
}
if autoFixable > 0 {
reminders = append(reminders, fmt.Sprintf("%d auto-fixable issues available - use 'devour quality fix'", autoFixable))
}
if len(history) > 0 {
latest := history[len(history)-1]
if latest.Findings == len(findings) {
reminders = append(reminders, "No progress since last scan - consider tackling a specific category")
}
}
return reminders
}
func (g *NarrativeGenerator) identifyRisks(findings []Finding, history []StateSnapshot) []string {
var risks []string
t4Count := 0
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT4 {
t4Count++
}
}
if t4Count > 3 {
risks = append(risks, fmt.Sprintf("High number of T4 issues (%d) indicates architectural debt", t4Count))
}
if len(history) >= 3 {
trend := 0
for i := len(history) - 3; i < len(history); i++ {
trend += history[i].Findings
}
avg := trend / 3
if len(findings) > int(float64(avg)*1.2) {
risks = append(risks, "Finding count is trending upward - debt is accumulating")
}
}
return risks
}
func (g *NarrativeGenerator) generateMilestone(phase string, scorecard *Scorecard) string {
switch phase {
case "maintenance":
return "Maintain current quality level"
case "critical":
return "Reduce T4 issues to zero"
case "debt_reduction":
return fmt.Sprintf("Reduce strict score below %d", g.targetScore)
case "cleanup":
return "Clear all T1 and T2 issues"
default:
return "Continue quality improvement"
}
}
func (g *NarrativeGenerator) explainWhyNow(phase string, findings []Finding) string {
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT4 {
return "T4 issues compound over time - addressing them early prevents architectural decay"
}
}
t1Count := 0
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT1 {
t1Count++
}
}
if t1Count > 5 {
return "Quick wins available - auto-fixers can clear low-hanging fruit in minutes"
}
return "Consistent small improvements compound into significant quality gains"
}
-754
View File
@@ -1,754 +0,0 @@
package quality
import (
"testing"
"time"
)
func TestNewNarrativeGenerator(t *testing.T) {
tests := []struct {
name string
targetScore int
expected int
}{
{"default target", 0, 95},
{"custom target", 85, 85},
{"negative target", -10, 95},
{"zero target", 0, 95},
{"high target", 100, 100},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gen := NewNarrativeGenerator(tt.targetScore)
if gen.targetScore != tt.expected {
t.Errorf("NewNarrativeGenerator() targetScore = %v, want %v", gen.targetScore, tt.expected)
}
})
}
}
func TestNarrativeGenerator_determinePhase(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
findings []Finding
expected string
}{
{
name: "no open issues",
findings: []Finding{{Status: StatusFixed}},
expected: "maintenance",
},
{
name: "critical phase with T4",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT4},
},
expected: "critical",
},
{
name: "debt reduction with many T3",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT3},
{Status: StatusOpen, Severity: SeverityT3},
{Status: StatusOpen, Severity: SeverityT3},
{Status: StatusOpen, Severity: SeverityT3},
{Status: StatusOpen, Severity: SeverityT3},
{Status: StatusOpen, Severity: SeverityT3},
},
expected: "debt_reduction",
},
{
name: "debt reduction with many open issues",
findings: func() []Finding {
var f []Finding
for i := 0; i < 25; i++ {
f = append(f, Finding{Status: StatusOpen, Severity: SeverityT2})
}
return f
}(),
expected: "debt_reduction",
},
{
name: "cleanup phase",
findings: func() []Finding {
var f []Finding
for i := 0; i < 10; i++ {
f = append(f, Finding{Status: StatusOpen, Severity: SeverityT2})
}
return f
}(),
expected: "cleanup",
},
{
name: "polish phase",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT2},
{Status: StatusOpen, Severity: SeverityT2},
},
expected: "polish",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scorecard := &Scorecard{TotalScore: 100}
phase := gen.determinePhase(tt.findings, scorecard)
if phase != tt.expected {
t.Errorf("determinePhase() = %v, want %v", phase, tt.expected)
}
})
}
}
func TestNarrativeGenerator_generateHeadline(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
phase string
scorecard *Scorecard
expected string
}{
{
name: "maintenance phase",
phase: "maintenance",
scorecard: &Scorecard{StrictScore: 50},
expected: "Codebase is healthy! Focus on preventing new debt.",
},
{
name: "critical phase",
phase: "critical",
scorecard: &Scorecard{StrictScore: 150},
expected: "Critical issues detected (150 strict score). Address T4 findings first.",
},
{
name: "debt reduction phase",
phase: "debt_reduction",
scorecard: &Scorecard{TotalScore: 200},
expected: "Significant technical debt (200 open issues). Systematic cleanup recommended.",
},
{
name: "cleanup phase",
phase: "cleanup",
scorecard: &Scorecard{TotalScore: 15},
expected: "Minor issues detected (15 open). Quick wins available.",
},
{
name: "polish phase",
phase: "polish",
scorecard: &Scorecard{TotalScore: 3},
expected: "Codebase in good shape (3 open issues).",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
headline := gen.generateHeadline(tt.phase, tt.scorecard)
if headline != tt.expected {
t.Errorf("generateHeadline() = %v, want %v", headline, tt.expected)
}
})
}
}
func TestNarrativeGenerator_classifyDimension(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
finding Finding
expected Dimension
}{
{
name: "complexity",
finding: Finding{Type: "complexity"},
expected: DimensionCodeQuality,
},
{
name: "complexity_ast",
finding: Finding{Type: "complexity_ast"},
expected: DimensionCodeQuality,
},
{
name: "duplication",
finding: Finding{Type: "duplication"},
expected: DimensionDuplication,
},
{
name: "dead_code",
finding: Finding{Type: "dead_code"},
expected: DimensionFileHealth,
},
{
name: "security",
finding: Finding{Type: "security"},
expected: DimensionSecurity,
},
{
name: "naming",
finding: Finding{Type: "naming"},
expected: DimensionNamingQuality,
},
{
name: "import_cycle",
finding: Finding{Type: "import_cycle"},
expected: DimensionAbstractionFit,
},
{
name: "unknown type",
finding: Finding{Type: "unknown"},
expected: DimensionCodeQuality,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dimension := gen.classifyDimension(tt.finding)
if dimension != tt.expected {
t.Errorf("classifyDimension() = %v, want %v", dimension, tt.expected)
}
})
}
}
func TestNarrativeGenerator_generateActions(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
findings []Finding
phase string
expected []string
}{
{
name: "mixed severities",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT4},
{Status: StatusOpen, Severity: SeverityT3},
{Status: StatusOpen, Severity: SeverityT2},
{Status: StatusOpen, Severity: SeverityT1},
},
phase: "critical",
expected: []string{
"Address 1 T4 (major refactor) issues - these require architectural changes",
"Review 1 T3 (needs judgment) issues - decide if they need fixing",
"Run auto-fixer for 1 T1 (auto-fixable) issues",
"Quick manual fixes available for 1 T2 issues",
},
},
{
name: "no open issues",
findings: []Finding{{Status: StatusFixed}},
phase: "maintenance",
expected: []string{"No immediate actions required - maintain code quality"},
},
{
name: "only T1 issues",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT1},
{Status: StatusOpen, Severity: SeverityT1},
},
phase: "polish",
expected: []string{
"Run auto-fixer for 2 T1 (auto-fixable) issues",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actions := gen.generateActions(tt.findings, tt.phase)
if len(actions) != len(tt.expected) {
t.Errorf("generateActions() length = %v, want %v", len(actions), len(tt.expected))
}
for i, action := range actions {
if i < len(tt.expected) && action != tt.expected[i] {
t.Errorf("generateActions()[%d] = %v, want %v", i, action, tt.expected[i])
}
}
})
}
}
func TestNarrativeGenerator_generateStrategy(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
findings []Finding
expected string
parallel bool
}{
{
name: "high auto-fixable coverage",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT1},
{Status: StatusOpen, Severity: SeverityT1},
{Status: StatusOpen, Severity: SeverityT2},
},
expected: "Use auto-fixers first, then address remaining issues manually",
parallel: false,
},
{
name: "some auto-fixable",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT1},
{Status: StatusOpen, Severity: SeverityT3},
{Status: StatusOpen, Severity: SeverityT4},
},
expected: "Start with auto-fixers for quick wins, then prioritize by impact",
parallel: false,
},
{
name: "no auto-fixable",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT3},
{Status: StatusOpen, Severity: SeverityT4},
},
expected: "Prioritize by severity and impact, starting with T4 issues",
parallel: false,
},
{
name: "no findings",
findings: []Finding{},
expected: "Prioritize by severity and impact, starting with T4 issues",
parallel: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dimensions := &NarrativeDimensions{}
strategy := gen.generateStrategy(tt.findings, dimensions)
if strategy.FixerLeverage.Recommendation != tt.expected {
t.Errorf("generateStrategy() recommendation = %v, want %v", strategy.FixerLeverage.Recommendation, tt.expected)
}
if strategy.CanParallelize != tt.parallel {
t.Errorf("generateStrategy() CanParallelize = %v, want %v", strategy.CanParallelize, tt.parallel)
}
})
}
}
func TestNarrativeGenerator_generateHint(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
findings []Finding
expected string
}{
{
name: "has T1 issues",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT1},
{Status: StatusOpen, Severity: SeverityT2},
},
expected: "T1 issues can be auto-fixed with 'devour quality fix'",
},
{
name: "has T4 issues but no T1",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT4},
{Status: StatusOpen, Severity: SeverityT3},
},
expected: "T4 issues require planning - consider creating a dedicated branch",
},
{
name: "no T1 or T4 issues",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT2},
{Status: StatusOpen, Severity: SeverityT3},
},
expected: "Focus on one category at a time for best results",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hint := gen.generateHint(tt.findings)
if hint != tt.expected {
t.Errorf("generateHint() = %v, want %v", hint, tt.expected)
}
})
}
}
func TestNarrativeGenerator_generateTools(t *testing.T) {
gen := NewNarrativeGenerator(95)
findings := []Finding{
{Status: StatusOpen, Severity: SeverityT1, Type: "dead_code"},
{Status: StatusOpen, Severity: SeverityT2, Type: "naming"},
}
tools := gen.generateTools(findings)
if tools.Plan.Command != "devour quality plan" {
t.Errorf("generateTools() Plan.Command = %v, want %v", tools.Plan.Command, "devour quality plan")
}
if !tools.Badge.Generated {
t.Error("generateTools() Badge.Generated should be true")
}
if len(tools.Fixers) != 1 {
t.Errorf("generateTools() Fixers length = %v, want 1", len(tools.Fixers))
}
}
func TestNarrativeGenerator_analyzeDebt(t *testing.T) {
gen := NewNarrativeGenerator(95)
findings := []Finding{
{Status: StatusOpen, Severity: SeverityT4, Type: "security", Score: 10},
{Status: StatusWontfix, Severity: SeverityT2, Type: "naming", Score: 5},
{Status: StatusOpen, Severity: SeverityT3, Type: "complexity", Score: 8},
}
scorecard := &Scorecard{StrictScore: 150}
debt := gen.analyzeDebt(findings, scorecard)
if debt.WontfixCount != 1 {
t.Errorf("analyzeDebt() WontfixCount = %v, want 1", debt.WontfixCount)
}
if debt.OverallGap != 150.0 {
t.Errorf("analyzeDebt() OverallGap = %v, want 150.0", debt.OverallGap)
}
if debt.WorstDimension != "Security" {
t.Errorf("analyzeDebt() WorstDimension = %v, want Security", debt.WorstDimension)
}
}
func TestNarrativeGenerator_calculateStrictTarget(t *testing.T) {
gen := NewNarrativeGenerator(100)
tests := []struct {
name string
scorecard *Scorecard
expected string
hasWarning bool
}{
{
name: "at target",
scorecard: &Scorecard{StrictScore: 100},
expected: "at_target",
hasWarning: false,
},
{
name: "near target",
scorecard: &Scorecard{StrictScore: 85},
expected: "near_target",
hasWarning: false,
},
{
name: "in progress",
scorecard: &Scorecard{StrictScore: 60},
expected: "in_progress",
hasWarning: true,
},
{
name: "needs work",
scorecard: &Scorecard{StrictScore: 30},
expected: "needs_work",
hasWarning: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
target := gen.calculateStrictTarget(tt.scorecard)
if target.State != tt.expected {
t.Errorf("calculateStrictTarget() State = %v, want %v", target.State, tt.expected)
}
if (target.Warning != nil) != tt.hasWarning {
t.Errorf("calculateStrictTarget() Warning presence = %v, want %v", target.Warning != nil, tt.hasWarning)
}
})
}
}
func TestNarrativeGenerator_generateReminders(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
findings []Finding
history []StateSnapshot
expected []string
}{
{
name: "auto-fixable available",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT1},
{Status: StatusOpen, Severity: SeverityT1},
},
history: []StateSnapshot{},
expected: []string{
"2 auto-fixable issues available - use 'devour quality fix'",
},
},
{
name: "no progress",
findings: []Finding{{Status: StatusOpen, Severity: SeverityT2}},
history: []StateSnapshot{{Findings: 1, Timestamp: time.Now()}},
expected: []string{
"No progress since last scan - consider tackling a specific category",
},
},
{
name: "no reminders",
findings: []Finding{{Status: StatusOpen, Severity: SeverityT3}},
history: []StateSnapshot{},
expected: []string{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reminders := gen.generateReminders(tt.findings, tt.history)
if len(reminders) != len(tt.expected) {
t.Errorf("generateReminders() length = %v, want %v", len(reminders), len(tt.expected))
}
for i, reminder := range reminders {
if i < len(tt.expected) && reminder != tt.expected[i] {
t.Errorf("generateReminders()[%d] = %v, want %v", i, reminder, tt.expected[i])
}
}
})
}
}
func TestNarrativeGenerator_identifyRisks(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
findings []Finding
history []StateSnapshot
expected []string
}{
{
name: "high T4 count",
findings: func() []Finding {
var f []Finding
for i := 0; i < 5; i++ {
f = append(f, Finding{Status: StatusOpen, Severity: SeverityT4})
}
return f
}(),
history: []StateSnapshot{},
expected: []string{
"High number of T4 issues (5) indicates architectural debt",
},
},
{
name: "upward trend",
findings: func() []Finding {
var f []Finding
for i := 0; i < 25; i++ {
f = append(f, Finding{Status: StatusOpen, Severity: SeverityT2})
}
return f
}(),
history: []StateSnapshot{
{Findings: 10, Timestamp: time.Now().Add(-3 * time.Hour)},
{Findings: 12, Timestamp: time.Now().Add(-2 * time.Hour)},
{Findings: 15, Timestamp: time.Now().Add(-1 * time.Hour)},
},
expected: []string{
"Finding count is trending upward - debt is accumulating",
},
},
{
name: "no risks",
findings: []Finding{{Status: StatusOpen, Severity: SeverityT2}},
history: []StateSnapshot{},
expected: []string{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
risks := gen.identifyRisks(tt.findings, tt.history)
if len(risks) != len(tt.expected) {
t.Errorf("identifyRisks() length = %v, want %v", len(risks), len(tt.expected))
}
for i, risk := range risks {
if i < len(tt.expected) && risk != tt.expected[i] {
t.Errorf("identifyRisks()[%d] = %v, want %v", i, risk, tt.expected[i])
}
}
})
}
}
func TestNarrativeGenerator_generateMilestone(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
phase string
scorecard *Scorecard
expected string
}{
{
name: "maintenance",
phase: "maintenance",
scorecard: &Scorecard{},
expected: "Maintain current quality level",
},
{
name: "critical",
phase: "critical",
scorecard: &Scorecard{},
expected: "Reduce T4 issues to zero",
},
{
name: "debt reduction",
phase: "debt_reduction",
scorecard: &Scorecard{},
expected: "Reduce strict score below 95",
},
{
name: "cleanup",
phase: "cleanup",
scorecard: &Scorecard{},
expected: "Clear all T1 and T2 issues",
},
{
name: "polish",
phase: "polish",
scorecard: &Scorecard{},
expected: "Continue quality improvement",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
milestone := gen.generateMilestone(tt.phase, tt.scorecard)
if milestone != tt.expected {
t.Errorf("generateMilestone() = %v, want %v", milestone, tt.expected)
}
})
}
}
func TestNarrativeGenerator_explainWhyNow(t *testing.T) {
gen := NewNarrativeGenerator(95)
tests := []struct {
name string
phase string
findings []Finding
expected string
}{
{
name: "has T4 issues",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT4},
},
expected: "T4 issues compound over time - addressing them early prevents architectural decay",
},
{
name: "many T1 issues",
findings: func() []Finding {
var f []Finding
for i := 0; i < 6; i++ {
f = append(f, Finding{Status: StatusOpen, Severity: SeverityT1})
}
return f
}(),
expected: "Quick wins available - auto-fixers can clear low-hanging fruit in minutes",
},
{
name: "few T1 issues",
findings: []Finding{
{Status: StatusOpen, Severity: SeverityT1},
{Status: StatusOpen, Severity: SeverityT2},
},
expected: "Consistent small improvements compound into significant quality gains",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
whyNow := gen.explainWhyNow(tt.phase, tt.findings)
if whyNow != tt.expected {
t.Errorf("explainWhyNow() = %v, want %v", whyNow, tt.expected)
}
})
}
}
func TestNarrativeGenerator_Generate(t *testing.T) {
gen := NewNarrativeGenerator(95)
findings := []Finding{
{Status: StatusOpen, Severity: SeverityT2, Type: "naming", Score: 5},
{Status: StatusOpen, Severity: SeverityT1, Type: "dead_code", Score: 3},
}
scorecard := &Scorecard{
TotalScore: 8,
StrictScore: 15,
TargetScore: 95,
LastScan: time.Now(),
}
history := []StateSnapshot{
{Findings: 10, Timestamp: time.Now().Add(-1 * time.Hour)},
}
narrative := gen.Generate(findings, scorecard, history)
if narrative.Phase == "" {
t.Error("Generate() Phase should not be empty")
}
if narrative.Headline == "" {
t.Error("Generate() Headline should not be empty")
}
if narrative.Dimensions == nil {
t.Error("Generate() Dimensions should not be nil")
}
if len(narrative.Actions) == 0 {
t.Error("Generate() Actions should not be empty")
}
if narrative.Strategy == nil {
t.Error("Generate() Strategy should not be nil")
}
if narrative.Tools == nil {
t.Error("Generate() Tools should not be nil")
}
if narrative.Debt == nil {
t.Error("Generate() Debt should not be nil")
}
if narrative.Milestone == "" {
t.Error("Generate() Milestone should not be empty")
}
if narrative.WhyNow == "" {
t.Error("Generate() WhyNow should not be empty")
}
if narrative.StrictTarget == nil {
t.Error("Generate() StrictTarget should not be nil")
}
}
@@ -239,7 +239,7 @@ func (d *SingleUseDetector) getFuncLOC(file string, startLine int) (int, error)
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, file, nil, 0)
if err != nil {
return 0, err
return 0, fmt.Errorf("parse %s for function loc lookup: %w", file, err)
}
loc := 0
@@ -43,6 +43,21 @@ func (d *LargeFileDetector) Detect(ctx context.Context, path string, config *qua
for _, file := range files {
loc, err := countLines(file)
if err != nil {
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("detector_read_error::large_file::%s", file),
Type: "detector_error",
Title: "Large file detector could not read file",
Description: fmt.Sprintf("Failed to count lines in %s: %v", filepath.Base(file), err),
File: file,
Line: 1,
Severity: quality.SeverityT2,
Score: 0,
Status: quality.StatusOpen,
Metadata: map[string]string{
"detector": "large_file",
"error": err.Error(),
},
})
continue
}
@@ -99,18 +114,21 @@ func (d *GodStructDetector) Detect(ctx context.Context, path string, config *qua
var findings []quality.Finding
for _, file := range files {
fileFindings := d.analyzeFile(file)
fileFindings, err := d.analyzeFile(file)
if err != nil {
return nil, fmt.Errorf("analyze god struct in %q: %w", file, err)
}
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (d *GodStructDetector) analyzeFile(path string) []quality.Finding {
func (d *GodStructDetector) analyzeFile(path string) ([]quality.Finding, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, path, nil, 0)
if err != nil {
return nil
return nil, fmt.Errorf("parse %s: %w", path, err)
}
methodCounts := make(map[string]int)
@@ -198,7 +216,7 @@ func (d *GodStructDetector) analyzeFile(path string) []quality.Finding {
}
}
return findings
return findings, nil
}
type DebugLogDetector struct {
@@ -227,22 +245,25 @@ func (d *DebugLogDetector) Detect(ctx context.Context, path string, config *qual
var findings []quality.Finding
for _, file := range files {
fileFindings := d.analyzeFile(file)
fileFindings, err := d.analyzeFile(file)
if err != nil {
return nil, fmt.Errorf("analyze debug logs in %q: %w", file, err)
}
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (d *DebugLogDetector) analyzeFile(path string) []quality.Finding {
func (d *DebugLogDetector) analyzeFile(path string) ([]quality.Finding, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, path, nil, 0)
if err != nil {
return nil
return nil, fmt.Errorf("parse %s: %w", path, err)
}
normPath := filepath.ToSlash(path)
if strings.Contains(normPath, "internal/ui/") || strings.Contains(normPath, "examples/") {
return nil
return nil, nil
}
debugPatterns := []string{
@@ -324,7 +345,7 @@ func (d *DebugLogDetector) analyzeFile(path string) []quality.Finding {
return true
})
return findings
return findings, nil
}
type GodFunctionDetector struct {
@@ -37,7 +37,7 @@ func (d *TestCoverageDetector) Detect(ctx context.Context, path string, config *
_, err := exec.LookPath("go")
if err != nil {
return nil, nil
return nil, fmt.Errorf("go toolchain is not available: %w", err)
}
if _, err := os.Stat(coverFile); os.IsNotExist(err) {
@@ -48,13 +48,13 @@ func (d *TestCoverageDetector) Detect(ctx context.Context, path string, config *
}
if _, err := os.Stat(coverFile); os.IsNotExist(err) {
return nil, nil
return nil, fmt.Errorf("coverage profile was not generated at %q", coverFile)
}
}
coverage, err := d.parseCoverageFile(coverFile)
if err != nil {
return nil, err
return nil, fmt.Errorf("parse coverage profile %q: %w", coverFile, err)
}
var findings []quality.Finding
@@ -210,7 +210,7 @@ func (d *UntestedFuncDetector) Detect(ctx context.Context, path string, config *
coverFile := filepath.Join(path, "coverage.out")
data, err := os.ReadFile(coverFile)
if err != nil {
return nil, nil
return nil, fmt.Errorf("read coverage profile %q: %w", coverFile, err)
}
uncoveredFuncs := make(map[string][]UncoveredFunc)
+17 -5
View File
@@ -82,8 +82,12 @@ func (p *GoPlugin) AnalyzeFile(ctx context.Context, path string, config *quality
analysis := &plugins.FileAnalysis{
Path: path,
Package: node.Name.Name,
LOC: countLOC(path),
}
loc, err := countLOC(path)
if err != nil {
return nil, fmt.Errorf("count loc for %s: %w", path, err)
}
analysis.LOC = loc
analysis.Imports = p.extractImports(node, fset)
analysis.Functions = p.extractFunctions(node, path, fset)
@@ -349,16 +353,24 @@ func (p *GoPlugin) LoadTypesInfo(ctx context.Context, path string) (*types.Info,
return pkgs[0].TypesInfo, pkgs[0].Fset, nil
}
func countLOC(path string) int {
func countLOC(path string) (int, error) {
data, err := os.ReadFile(path)
if err != nil {
return 0
return 0, fmt.Errorf("read file for loc %q: %w", path, err)
}
return strings.Count(string(data), "\n") + 1
return strings.Count(string(data), "\n") + 1, nil
}
var pluginRegistrationErr error
// RegistrationError returns a plugin registration error captured during init, if any.
func RegistrationError() error {
return pluginRegistrationErr
}
func init() {
if err := plugins.Register(New()); err != nil {
panic(fmt.Sprintf("failed to register go plugin: %v", err))
pluginRegistrationErr = fmt.Errorf("register go quality plugin: %w", err)
_, _ = fmt.Fprintf(os.Stderr, "warning: %v\n", pluginRegistrationErr)
}
}
-315
View File
@@ -1,315 +0,0 @@
package review
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
"github.com/yourorg/devour/internal/quality"
)
type ReviewPacket struct {
Generated time.Time `json:"generated"`
ProjectPath string `json:"project_path"`
Language string `json:"language"`
Scorecard *quality.Scorecard `json:"scorecard"`
Findings []FindingReview `json:"findings"`
Context ReviewContext `json:"context"`
Questions []ReviewQuestion `json:"questions"`
}
type FindingReview struct {
ID string `json:"id"`
Type string `json:"type"`
Title string `json:"title"`
Description string `json:"description"`
File string `json:"file"`
Line int `json:"line"`
Severity quality.Severity `json:"severity"`
Score int `json:"score"`
Status quality.Status `json:"status"`
NeedsReview bool `json:"needs_review"`
Context string `json:"context"`
Metadata map[string]string `json:"metadata"`
}
type ReviewContext struct {
TotalFiles int `json:"total_files"`
TotalLOC int `json:"total_loc"`
FindingsByDim map[string]int `json:"findings_by_dimension"`
TopIssues []string `json:"top_issues"`
Trends map[string]string `json:"trends"`
}
type ReviewQuestion struct {
ID string `json:"id"`
Category string `json:"category"`
Question string `json:"question"`
Options []string `json:"options,omitempty"`
}
type PacketGenerator struct {
dataDir string
}
func NewPacketGenerator(dataDir string) *PacketGenerator {
return &PacketGenerator{dataDir: dataDir}
}
func (g *PacketGenerator) Generate(findings []quality.Finding, scorecard *quality.Scorecard, lang string) (*ReviewPacket, error) {
packet := &ReviewPacket{
Generated: time.Now(),
ProjectPath: g.dataDir,
Language: lang,
Scorecard: scorecard,
Findings: g.convertFindings(findings),
Context: g.buildContext(findings),
Questions: g.generateQuestions(findings),
}
return packet, nil
}
func (g *PacketGenerator) convertFindings(findings []quality.Finding) []FindingReview {
var reviews []FindingReview
for _, f := range findings {
if f.Status != quality.StatusOpen {
continue
}
review := FindingReview{
ID: f.ID,
Type: f.Type,
Title: f.Title,
Description: f.Description,
File: f.File,
Line: f.Line,
Severity: f.Severity,
Score: f.Score,
Status: f.Status,
NeedsReview: f.Severity >= quality.SeverityT3,
Metadata: f.Metadata,
}
review.Context = g.generateContext(f)
reviews = append(reviews, review)
}
return reviews
}
func (g *PacketGenerator) generateContext(f quality.Finding) string {
switch f.Type {
case "complexity", "complexity_ast":
return "This function may be difficult to maintain. Consider if it can be simplified or broken down."
case "duplication":
return "Similar code exists elsewhere. Consider extracting common functionality."
case "dead_code":
return "This code appears unused. Verify before removing - it may be called via reflection or external tools."
case "security":
return "Potential security concern. Review carefully and consider security implications."
case "import_cycle":
return "Circular dependency detected. This can cause initialization issues and makes code harder to understand."
default:
return "Review this finding and decide if it needs addressing."
}
}
func (g *PacketGenerator) buildContext(findings []quality.Finding) ReviewContext {
byDim := make(map[string]int)
var topIssues []string
for _, f := range findings {
if f.Status == quality.StatusOpen {
dim := g.classifyDimension(f)
byDim[dim]++
}
}
topCount := 0
for _, f := range findings {
if f.Status == quality.StatusOpen && topCount < 5 {
topIssues = append(topIssues, fmt.Sprintf("%s: %s", f.Type, f.Title))
topCount++
}
}
return ReviewContext{
FindingsByDim: byDim,
TopIssues: topIssues,
Trends: make(map[string]string),
}
}
func (g *PacketGenerator) classifyDimension(f quality.Finding) string {
switch f.Type {
case "complexity", "complexity_ast":
return "Code Quality"
case "duplication":
return "Duplication"
case "dead_code", "unused_import", "unused":
return "File Health"
case "security":
return "Security"
case "naming":
return "Naming Quality"
case "import_cycle":
return "Architecture"
default:
return "Other"
}
}
func (g *PacketGenerator) generateQuestions(findings []quality.Finding) []ReviewQuestion {
var questions []ReviewQuestion
hasDupes := false
hasComplex := false
hasDead := false
for _, f := range findings {
if f.Status != quality.StatusOpen {
continue
}
switch f.Type {
case "duplication":
hasDupes = true
case "complexity", "complexity_ast":
hasComplex = true
case "dead_code":
hasDead = true
}
}
if hasDupes {
questions = append(questions, ReviewQuestion{
ID: "dupe_strategy",
Category: "duplication",
Question: "How should duplicated code be consolidated?",
Options: []string{
"Extract to shared utility",
"Keep separate (different use cases)",
"Refactor to common interface",
},
})
}
if hasComplex {
questions = append(questions, ReviewQuestion{
ID: "complexity_strategy",
Category: "complexity",
Question: "What's the best approach for complex functions?",
Options: []string{
"Break into smaller functions",
"Introduce helper types",
"Accept current complexity",
},
})
}
if hasDead {
questions = append(questions, ReviewQuestion{
ID: "dead_code_strategy",
Category: "maintenance",
Question: "Should unused code be removed?",
Options: []string{
"Remove if truly unused",
"Keep for future use",
"Mark as deprecated",
},
})
}
questions = append(questions, ReviewQuestion{
ID: "priority",
Category: "planning",
Question: "Which area should be prioritized for improvement?",
Options: []string{
"Security issues first",
"Complexity reduction",
"Dead code cleanup",
"Architecture improvements",
},
})
return questions
}
func (g *PacketGenerator) Save(packet *ReviewPacket, filename string) error {
reviewDir := filepath.Join(g.dataDir, "review")
if err := os.MkdirAll(reviewDir, 0755); err != nil {
return fmt.Errorf("failed to create review directory: %w", err)
}
path := filepath.Join(reviewDir, filename)
data, err := json.MarshalIndent(packet, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal packet: %w", err)
}
if err := os.WriteFile(path, data, 0644); err != nil {
return fmt.Errorf("failed to write packet: %w", err)
}
return nil
}
func (g *PacketGenerator) Load(filename string) (*ReviewPacket, error) {
path := filepath.Join(g.dataDir, "review", filename)
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read packet: %w", err)
}
var packet ReviewPacket
if err := json.Unmarshal(data, &packet); err != nil {
return nil, fmt.Errorf("failed to parse packet: %w", err)
}
return &packet, nil
}
func (g *PacketGenerator) ImportReview(filename string, responses map[string]string) error {
_, err := g.Load(filename)
if err != nil {
return err
}
findingsPath := filepath.Join(g.dataDir, "quality", "status.json")
data, err := os.ReadFile(findingsPath)
if err != nil {
return fmt.Errorf("failed to read findings: %w", err)
}
var state struct {
Findings []quality.Finding `json:"findings"`
}
if err := json.Unmarshal(data, &state); err != nil {
return fmt.Errorf("failed to parse findings: %w", err)
}
for _, f := range state.Findings {
if response, ok := responses[f.ID]; ok {
if f.Metadata == nil {
f.Metadata = make(map[string]string)
}
f.Metadata["review_response"] = response
f.Metadata["reviewed_at"] = time.Now().Format(time.RFC3339)
}
}
updatedData, err := json.MarshalIndent(state, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal updated findings: %w", err)
}
if err := os.WriteFile(findingsPath, updatedData, 0644); err != nil {
return fmt.Errorf("failed to write updated findings: %w", err)
}
return nil
}
+17
View File
@@ -76,6 +76,23 @@ func (s *Scanner) Scan(ctx context.Context) (*ScanResult, error) {
findings, err := s.runDetectorSafely(ctx, detector, name)
if err != nil {
log.Printf("Detector %s failed: %v", name, err)
allFindings = append(allFindings, Finding{
ID: fmt.Sprintf("detector_error::%s", name),
Type: "detector_error",
Title: fmt.Sprintf("Detector failed: %s", name),
Description: fmt.Sprintf("Detector %s failed during scan: %v", name, err),
File: s.config.Path,
Line: 1,
Severity: SeverityT2,
Score: 0,
Status: StatusOpen,
Metadata: map[string]string{
"detector": name,
"error": err.Error(),
},
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
})
continue
}
+19 -2
View File
@@ -30,7 +30,24 @@ func TestScannerRecoversDetectorPanic(t *testing.T) {
if err != nil {
t.Fatalf("scan should recover detector panic, got err: %v", err)
}
if len(result.Findings) != 1 {
t.Fatalf("expected findings from healthy detector only, got %d", len(result.Findings))
if len(result.Findings) != 2 {
t.Fatalf("expected healthy finding plus detector_error, got %d", len(result.Findings))
}
hasOK := false
hasDetectorError := false
for _, f := range result.Findings {
if f.ID == "ok" {
hasOK = true
}
if f.Type == "detector_error" {
hasDetectorError = true
}
}
if !hasOK {
t.Fatalf("expected to keep finding from healthy detector")
}
if !hasDetectorError {
t.Fatalf("expected detector_error finding for panicing detector")
}
}
+5 -2
View File
@@ -271,8 +271,11 @@ func TestScanner_Scan_WithFailingDetector(t *testing.T) {
}
// Should succeed despite failing detector
if len(result.Findings) != 0 {
t.Errorf("Scan() expected 0 findings, got %d", len(result.Findings))
if len(result.Findings) != 1 {
t.Errorf("Scan() expected 1 detector_error finding, got %d", len(result.Findings))
}
if len(result.Findings) == 1 && result.Findings[0].Type != "detector_error" {
t.Errorf("Scan() expected detector_error finding, got %q", result.Findings[0].Type)
}
}
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -70,28 +69,7 @@ func (s *AstroDocsScraper) DetectChanges(ctx context.Context, source *Source, la
}
func (s *AstroDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *AstroDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -75,28 +74,7 @@ func (s *CloudflareDocsScraper) DetectChanges(ctx context.Context, source *Sourc
}
func (s *CloudflareDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *CloudflareDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -70,28 +69,7 @@ func (s *DockerDocsScraper) DetectChanges(ctx context.Context, source *Source, l
}
func (s *DockerDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *DockerDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -6,7 +6,6 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -91,28 +90,7 @@ func (s *GoDocsScraper) DetectChanges(ctx context.Context, source *Source, lastH
}
func (s *GoDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *GoDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -85,28 +84,7 @@ func (s *JavaDocsScraper) DetectChanges(ctx context.Context, source *Source, las
}
func (s *JavaDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *JavaDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -80,28 +79,7 @@ func (s *MCPDocsScraper) DetectChanges(ctx context.Context, source *Source, last
}
func (s *MCPDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *MCPDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -90,28 +89,7 @@ func (s *NuxtDocsScraper) DetectChanges(ctx context.Context, source *Source, las
}
func (s *NuxtDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *NuxtDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -100,28 +99,7 @@ func (s *PythonDocsScraper) DetectChanges(ctx context.Context, source *Source, l
}
func (s *PythonDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *PythonDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -80,28 +79,7 @@ func (s *ReactDocsScraper) DetectChanges(ctx context.Context, source *Source, la
}
func (s *ReactDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *ReactDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -6,7 +6,6 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -106,28 +105,7 @@ func (s *RustDocsScraper) DetectChanges(ctx context.Context, source *Source, las
}
func (s *RustDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *RustDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -80,28 +79,7 @@ func (s *SpringDocsScraper) DetectChanges(ctx context.Context, source *Source, l
}
func (s *SpringDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *SpringDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -85,28 +84,7 @@ func (s *TSDocsScraper) DetectChanges(ctx context.Context, source *Source, lastH
}
func (s *TSDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *TSDocsScraper) generateHash(content string) string {
+65
View File
@@ -1,8 +1,14 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"net/url"
"strings"
basescraper "github.com/yourorg/devour/internal/scraper"
)
@@ -19,3 +25,62 @@ func generateDocID(urlStr string) string {
hash := sha256.Sum256([]byte(urlStr))
return hex.EncodeToString(hash[:12])
}
func fetchExternalPage(ctx context.Context, client *http.Client, userAgent, targetURL string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
snippet, readErr := readErrorSnippet(resp.Body)
if readErr != nil {
return "", fmt.Errorf("GET %s returned HTTP %d and body read failed: %w", summarizeURL(targetURL), resp.StatusCode, readErr)
}
return "", fmt.Errorf("GET %s returned HTTP %d: %s", summarizeURL(targetURL), resp.StatusCode, snippet)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func readErrorSnippet(body io.Reader) (string, error) {
const maxErrorBodyBytes = 512
data, err := io.ReadAll(io.LimitReader(body, maxErrorBodyBytes))
if err != nil {
return "", err
}
msg := strings.TrimSpace(string(data))
if msg == "" {
return "<empty body>", nil
}
return msg, nil
}
func summarizeURL(rawURL string) string {
parsedURL, err := url.Parse(rawURL)
if err != nil || parsedURL.Host == "" {
return rawURL
}
path := parsedURL.EscapedPath()
if path == "" {
path = "/"
}
return parsedURL.Host + path
}
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -85,28 +84,7 @@ func (s *VueDocsScraper) DetectChanges(ctx context.Context, source *Source, last
}
func (s *VueDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *VueDocsScraper) generateHash(content string) string {
+7 -7
View File
@@ -28,12 +28,12 @@ func (s *GitHubScraper) Scrape(ctx context.Context, source *Source) ([]*Document
repoURL, repoName, err := s.resolveRepo(source)
if err != nil {
return nil, err
return nil, fmt.Errorf("resolve github repository: %w", err)
}
tmpDir, err := os.MkdirTemp("", "devour-github-*")
if err != nil {
return nil, err
return nil, fmt.Errorf("create temporary clone directory: %w", err)
}
defer os.RemoveAll(tmpDir)
@@ -81,14 +81,14 @@ func (s *GitHubScraper) Scrape(ctx context.Context, source *Source) ([]*Document
local := NewLocalScraper(s.config)
docs, err := local.Scrape(ctx, localSource)
if err != nil {
return nil, err
return nil, fmt.Errorf("scrape repository docs: %w", err)
}
if len(docs) == 0 && len(source.Include) == 0 {
// Sparse patterns did not match this repository layout; retry full checkout.
_ = exec.CommandContext(ctx, "git", "-C", tmpDir, "sparse-checkout", "disable").Run()
docs, err = local.Scrape(ctx, localSource)
if err != nil {
return nil, err
return nil, fmt.Errorf("scrape repository docs after sparse fallback: %w", err)
}
}
@@ -129,7 +129,7 @@ func (s *GitHubScraper) DetectChanges(ctx context.Context, source *Source, lastH
}
_, repoName, err := s.resolveRepo(source)
if err != nil {
return false, "", err
return false, "", fmt.Errorf("resolve github repository: %w", err)
}
remote := "https://github.com/" + strings.TrimSuffix(repoName, ".git") + ".git"
@@ -141,7 +141,7 @@ func (s *GitHubScraper) DetectChanges(ctx context.Context, source *Source, lastH
cmd := exec.CommandContext(ctx, "git", "ls-remote", remote, branch)
output, err := cmd.Output()
if err != nil {
return false, "", err
return false, "", fmt.Errorf("run git ls-remote for %s (%s): %w", remote, branch, err)
}
line := strings.TrimSpace(string(output))
if line == "" {
@@ -169,7 +169,7 @@ func (s *GitHubScraper) resolveRepo(source *Source) (repoURL string, repoName st
u, err := url.Parse(raw)
if err != nil {
return "", "", err
return "", "", fmt.Errorf("parse github url %q: %w", raw, err)
}
if !strings.Contains(strings.ToLower(u.Host), "github.com") {
return "", "", fmt.Errorf("not a github url: %s", raw)
+19 -7
View File
@@ -4,8 +4,10 @@ import (
"context"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"regexp"
@@ -44,14 +46,15 @@ func (s *LocalScraper) Scrape(ctx context.Context, source *Source) ([]*Document,
info, err := os.Stat(root)
if err != nil {
return nil, err
return nil, fmt.Errorf("stat local source root %q: %w", root, err)
}
docs := make([]*Document, 0)
nonFatalErrors := make([]error, 0)
if !info.IsDir() {
doc, err := s.fileToDocument(root, source)
if err != nil {
return nil, err
return nil, fmt.Errorf("convert local source file %q: %w", root, err)
}
return []*Document{doc}, nil
}
@@ -89,13 +92,22 @@ func (s *LocalScraper) Scrape(ctx context.Context, source *Source) ([]*Document,
doc, err := s.fileToDocument(path, source)
if err != nil {
if len(nonFatalErrors) < 20 {
nonFatalErrors = append(nonFatalErrors, fmt.Errorf("%s: %w", path, err))
}
return nil
}
docs = append(docs, doc)
return nil
})
if err != nil {
return nil, err
return nil, fmt.Errorf("walk local source root %q: %w", root, err)
}
if len(nonFatalErrors) > 0 {
log.Printf("local scraper skipped %d files due to conversion errors (sample: %v)", len(nonFatalErrors), nonFatalErrors[0])
if len(docs) == 0 {
return nil, fmt.Errorf("local scrape failed while converting files: %w", errors.Join(nonFatalErrors...))
}
}
return docs, nil
@@ -118,7 +130,7 @@ func (s *LocalScraper) DetectChanges(ctx context.Context, source *Source, lastHa
h := sha256.New()
err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
return fmt.Errorf("walk local source path %q: %w", path, err)
}
if d.IsDir() {
name := d.Name()
@@ -133,13 +145,13 @@ func (s *LocalScraper) DetectChanges(ctx context.Context, source *Source, lastHa
info, infoErr := d.Info()
if infoErr != nil {
return infoErr
return fmt.Errorf("stat local source file %q: %w", path, infoErr)
}
fmt.Fprintf(h, "%s|%d|%d\n", path, info.Size(), info.ModTime().UnixNano())
return nil
})
if err != nil {
return false, "", err
return false, "", fmt.Errorf("walk local source root %q for change detection: %w", root, err)
}
hash := hex.EncodeToString(h.Sum(nil))
@@ -149,7 +161,7 @@ func (s *LocalScraper) DetectChanges(ctx context.Context, source *Source, lastHa
func (s *LocalScraper) fileToDocument(path string, source *Source) (*Document, error) {
b, err := os.ReadFile(path)
if err != nil {
return nil, err
return nil, fmt.Errorf("read local source file %q: %w", path, err)
}
ext := strings.ToLower(filepath.Ext(path))
+4 -3
View File
@@ -5,6 +5,7 @@ import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
@@ -81,7 +82,7 @@ func (s *LocalSearchScraper) Scrape(ctx context.Context, source *Source) ([]*Doc
docs := make([]*Document, 0, limit)
seen := make(map[string]bool)
var scrapeErrors []string
var scrapeErrors []error
for i, result := range results {
if ctx.Err() != nil {
@@ -109,7 +110,7 @@ func (s *LocalSearchScraper) Scrape(ctx context.Context, source *Source) ([]*Doc
})
if err != nil {
if len(scrapeErrors) < 20 {
scrapeErrors = append(scrapeErrors, fmt.Sprintf("%s: %v", resultURL, err))
scrapeErrors = append(scrapeErrors, fmt.Errorf("%s: %w", resultURL, err))
}
continue
}
@@ -140,7 +141,7 @@ func (s *LocalSearchScraper) Scrape(ctx context.Context, source *Source) ([]*Doc
if len(docs) == 0 {
if len(scrapeErrors) > 0 {
return nil, fmt.Errorf("local search returned results but page scraping failed: %s", strings.Join(scrapeErrors, "; "))
return nil, fmt.Errorf("local search returned results but page scraping failed: %w", errors.Join(scrapeErrors...))
}
return nil, fmt.Errorf("local search yielded no usable results for query %q", query)
}
+7 -7
View File
@@ -42,12 +42,12 @@ func (s *OpenAPIScraper) Scrape(ctx context.Context, source *Source) ([]*Documen
raw, specURL, err := s.readSpec(ctx, source)
if err != nil {
return nil, err
return nil, fmt.Errorf("read openapi spec: %w", err)
}
spec, err := parseOpenAPISpec(raw)
if err != nil {
return nil, err
return nil, fmt.Errorf("parse openapi spec %q: %w", specURL, err)
}
docs := make([]*Document, 0)
@@ -138,7 +138,7 @@ func (s *OpenAPIScraper) readSpec(ctx context.Context, source *Source) ([]byte,
if strings.HasPrefix(rawPath, "http://") || strings.HasPrefix(rawPath, "https://") {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawPath, nil)
if err != nil {
return nil, "", err
return nil, "", fmt.Errorf("build openapi fetch request: %w", err)
}
if s.config != nil && strings.TrimSpace(s.config.UserAgent) != "" {
req.Header.Set("User-Agent", s.config.UserAgent)
@@ -146,7 +146,7 @@ func (s *OpenAPIScraper) readSpec(ctx context.Context, source *Source) ([]byte,
resp, err := s.client.Do(req)
if err != nil {
return nil, "", err
return nil, "", fmt.Errorf("fetch openapi spec from %s: %w", rawPath, err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
@@ -154,14 +154,14 @@ func (s *OpenAPIScraper) readSpec(ctx context.Context, source *Source) ([]byte,
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 10<<20))
if err != nil {
return nil, "", err
return nil, "", fmt.Errorf("read openapi response body from %s: %w", rawPath, err)
}
return body, rawPath, nil
}
b, err := os.ReadFile(rawPath)
if err != nil {
return nil, "", err
return nil, "", fmt.Errorf("read openapi file %q: %w", rawPath, err)
}
return b, "file://" + rawPath, nil
}
@@ -214,7 +214,7 @@ func parseOpenAPISpec(raw []byte) (*openAPISpec, error) {
var spec openAPISpec
if err := json.Unmarshal(raw, &spec); err != nil {
if yamlErr := yaml.Unmarshal(raw, &spec); yamlErr != nil {
return nil, fmt.Errorf("invalid openapi content: %w", err)
return nil, fmt.Errorf("invalid openapi content (json: %v; yaml: %w)", err, yamlErr)
}
}
+5 -4
View File
@@ -4,6 +4,7 @@ import (
"context"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"net/url"
"path"
@@ -32,7 +33,7 @@ func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, e
scheduled := make(map[string]bool)
contentHashes := make(map[string]bool)
var mu sync.Mutex
var scrapeErrors []string
var scrapeErrors []error
// Parse base URL for domain restrictions
baseURL, err := url.Parse(source.URL)
@@ -102,7 +103,7 @@ func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, e
}
mu.Lock()
if len(scrapeErrors) < 20 {
scrapeErrors = append(scrapeErrors, fmt.Sprintf("%s: %v", reqURL, err))
scrapeErrors = append(scrapeErrors, fmt.Errorf("%s: %w", reqURL, err))
}
mu.Unlock()
})
@@ -236,7 +237,7 @@ func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, e
}
mu.Lock()
if len(scrapeErrors) < 20 {
scrapeErrors = append(scrapeErrors, fmt.Sprintf("%s: %v", absoluteURL, err))
scrapeErrors = append(scrapeErrors, fmt.Errorf("%s: %w", absoluteURL, err))
}
mu.Unlock()
}
@@ -256,7 +257,7 @@ func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, e
if len(documents) == 0 {
if len(scrapeErrors) > 0 {
return nil, fmt.Errorf("web scrape failed: %s", strings.Join(scrapeErrors, "; "))
return nil, fmt.Errorf("web scrape failed: %w", errors.Join(scrapeErrors...))
}
return nil, fmt.Errorf("web scrape extracted no documents from %s", source.URL)
}
+42 -19
View File
@@ -5,8 +5,10 @@ import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"sort"
@@ -111,18 +113,19 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
return nil, fmt.Errorf("docs directory is required")
}
if err := os.MkdirAll(e.IndexDir, 0o755); err != nil {
return nil, err
return nil, fmt.Errorf("create index dir %q: %w", e.IndexDir, err)
}
if err := os.MkdirAll(e.MetadataDir, 0o755); err != nil {
return nil, err
return nil, fmt.Errorf("create metadata dir %q: %w", e.MetadataDir, err)
}
docFiles, sourceHash, err := e.listDocFiles()
if err != nil {
return nil, err
return nil, fmt.Errorf("list docs for rebuild: %w", err)
}
docs := make([]indexedDoc, 0, len(docFiles))
parseErrors := make([]error, 0)
tokenCount := 0
for _, file := range docFiles {
select {
@@ -133,6 +136,9 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
rd, err := parseDocFile(file)
if err != nil {
if len(parseErrors) < 20 {
parseErrors = append(parseErrors, fmt.Errorf("%s: %w", file, err))
}
continue
}
if strings.TrimSpace(rd.Content) == "" {
@@ -163,11 +169,17 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
Length: length,
})
}
if len(parseErrors) > 0 {
log.Printf("search rebuild skipped %d files due to parse/read errors (sample: %v)", len(parseErrors), parseErrors[0])
if len(docFiles) > 0 && len(docs) == 0 {
return nil, fmt.Errorf("rebuild produced no indexable docs after parse failures: %w", errors.Join(parseErrors...))
}
}
index := persistedIndex{Version: indexVersion, BuiltAt: time.Now(), Docs: docs}
indexPath := filepath.Join(e.IndexDir, indexFileName)
if err := writeJSON(indexPath, index); err != nil {
return nil, err
return nil, fmt.Errorf("write lexical index: %w", err)
}
meta := persistedMeta{
@@ -179,7 +191,7 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
}
metaPath := filepath.Join(e.MetadataDir, metaFileName)
if err := writeJSON(metaPath, meta); err != nil {
return nil, err
return nil, fmt.Errorf("write lexical metadata: %w", err)
}
return &IndexStats{
@@ -199,20 +211,28 @@ func (e *Engine) EnsureIndexed(ctx context.Context) (*IndexStats, error) {
if os.IsNotExist(err) {
return e.Rebuild(ctx)
}
return nil, err
return nil, fmt.Errorf("read index metadata %q: %w", metaPath, err)
}
var meta persistedMeta
if err := json.Unmarshal(b, &meta); err != nil {
return e.Rebuild(ctx)
stats, rebuildErr := e.Rebuild(ctx)
if rebuildErr != nil {
return nil, fmt.Errorf("rebuild after invalid metadata %q: %w", metaPath, rebuildErr)
}
return stats, nil
}
_, sourceHash, err := e.listDocFiles()
if err != nil {
return nil, err
return nil, fmt.Errorf("list docs for metadata check: %w", err)
}
if sourceHash != meta.SourceFileHash {
return e.Rebuild(ctx)
stats, rebuildErr := e.Rebuild(ctx)
if rebuildErr != nil {
return nil, fmt.Errorf("rebuild after source hash change: %w", rebuildErr)
}
return stats, nil
}
return &IndexStats{
@@ -232,18 +252,18 @@ func (e *Engine) Search(ctx context.Context, query string, opts SearchOptions) (
stats, err := e.EnsureIndexed(ctx)
if err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("ensure lexical index: %w", err)
}
indexPath := filepath.Join(e.IndexDir, indexFileName)
b, err := os.ReadFile(indexPath)
if err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("read lexical index %q: %w", indexPath, err)
}
var idx persistedIndex
if err := json.Unmarshal(b, &idx); err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("decode lexical index %q: %w", indexPath, err)
}
limit := opts.Limit
@@ -270,7 +290,7 @@ func (e *Engine) Search(ctx context.Context, query string, opts SearchOptions) (
for _, doc := range idx.Docs {
select {
case <-ctx.Done():
return nil, nil, ctx.Err()
return nil, nil, fmt.Errorf("search canceled: %w", ctx.Err())
default:
}
score := lexicalScore(qFreq, queryTokens, doc)
@@ -322,7 +342,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
err := filepath.WalkDir(e.DocsDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
return fmt.Errorf("walk docs entry %q: %w", path, err)
}
if d.IsDir() {
return nil
@@ -336,7 +356,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
info, statErr := d.Info()
if statErr != nil {
return statErr
return fmt.Errorf("stat docs file %q: %w", path, statErr)
}
files = append(files, path)
fmt.Fprintf(h, "%s|%d|%d\n", path, info.Size(), info.ModTime().UnixNano())
@@ -346,7 +366,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
if os.IsNotExist(err) {
return []string{}, hashString("empty"), nil
}
return nil, "", err
return nil, "", fmt.Errorf("walk docs dir %q: %w", e.DocsDir, err)
}
sort.Strings(files)
@@ -356,7 +376,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
func parseDocFile(path string) (*rawDoc, error) {
b, err := os.ReadFile(path)
if err != nil {
return nil, err
return nil, fmt.Errorf("read doc file %q: %w", path, err)
}
ext := strings.ToLower(filepath.Ext(path))
switch ext {
@@ -393,9 +413,12 @@ func markdownTitle(content string) string {
func writeJSON(path string, v any) error {
b, err := json.MarshalIndent(v, "", " ")
if err != nil {
return err
return fmt.Errorf("marshal json payload: %w", err)
}
return os.WriteFile(path, b, 0o644)
if err := os.WriteFile(path, b, 0o644); err != nil {
return fmt.Errorf("write json file %q: %w", path, err)
}
return nil
}
func tokenize(input string) []string {
+27 -9
View File
@@ -7,6 +7,7 @@ import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"strings"
@@ -129,11 +130,19 @@ func (s *HTTPServer) Start(ctx context.Context) error {
defer r.Body.Close()
var req rpcRequest
if err := json.NewDecoder(io.LimitReader(r.Body, 2<<20)).Decode(&req); err != nil {
writeRPC(w, rpcResponse{JSONRPC: "2.0", Error: &rpcError{Code: -32700, Message: "parse error"}})
if writeErr := writeRPC(w, rpcResponse{JSONRPC: "2.0", Error: &rpcError{Code: -32700, Message: "parse error"}}); writeErr != nil {
wrapped := wrapTransportError("http", "encode parse-error response", writeErr)
log.Printf("%v", wrapped)
http.Error(w, wrapped.Error(), http.StatusInternalServerError)
}
return
}
resp := s.handleRPC(r.Context(), req)
writeRPC(w, resp)
if err := writeRPC(w, resp); err != nil {
wrapped := wrapTransportError("http", "encode rpc response", err)
log.Printf("%v", wrapped)
http.Error(w, wrapped.Error(), http.StatusInternalServerError)
}
})
host := s.config.Host
@@ -156,10 +165,10 @@ func (s *HTTPServer) Start(ctx context.Context) error {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = s.http.Shutdown(shutdownCtx)
return ctx.Err()
return wrapTransportError("http", "server context canceled", ctx.Err())
case err := <-errCh:
if err != nil && err != http.ErrServerClosed {
return err
return wrapTransportError("http", "listen and serve", err)
}
return nil
}
@@ -210,17 +219,19 @@ func (s *StdioServer) Start(ctx context.Context) error {
var req rpcRequest
if err := json.Unmarshal([]byte(line), &req); err != nil {
_ = out.Encode(rpcResponse{JSONRPC: "2.0", Error: &rpcError{Code: -32700, Message: "parse error"}})
if encodeErr := out.Encode(rpcResponse{JSONRPC: "2.0", Error: &rpcError{Code: -32700, Message: "parse error"}}); encodeErr != nil {
return wrapTransportError("stdio", "encode parse-error response", encodeErr)
}
continue
}
resp := handleRPC(ctx, s.config.Handler, req)
if err := out.Encode(resp); err != nil {
return err
return wrapTransportError("stdio", "encode rpc response", err)
}
}
if err := scanner.Err(); err != nil {
return err
return wrapTransportError("stdio", "scan stdin", err)
}
return nil
}
@@ -247,10 +258,17 @@ func handleRPC(ctx context.Context, handler MethodHandler, req rpcRequest) rpcRe
return rpcResponse{JSONRPC: "2.0", ID: req.ID, Result: result}
}
func writeRPC(w http.ResponseWriter, payload rpcResponse) {
func writeRPC(w http.ResponseWriter, payload rpcResponse) error {
w.Header().Set("Content-Type", "application/json")
if payload.Error != nil {
w.WriteHeader(http.StatusBadRequest)
}
_ = json.NewEncoder(w).Encode(payload)
if err := json.NewEncoder(w).Encode(payload); err != nil {
return fmt.Errorf("encode rpc response: %w", err)
}
return nil
}
func wrapTransportError(transport, operation string, err error) error {
return fmt.Errorf("%s rpc %s failed: %w", transport, operation, err)
}