first commit

This commit is contained in:
Tomas Dvorak
2026-02-22 10:42:17 +01:00
commit 55885a0e8f
239 changed files with 103690 additions and 0 deletions
+38
View File
@@ -0,0 +1,38 @@
// Package ai provides AI integration for embeddings and context injection.
package ai
import (
"context"
)
// Config holds AI configuration.
type Config struct {
Provider string `yaml:"provider"`
Model string `yaml:"model"`
Dimensions int `yaml:"dimensions"`
APIKey string `yaml:"api_key"`
BatchSize int `yaml:"batch_size"`
BaseURL string `yaml:"base_url"`
Temperature float64 `yaml:"temperature"`
}
// Client provides AI operations.
type Client interface {
// Embed generates embeddings for texts.
Embed(ctx context.Context, texts []string) ([][]float32, error)
// QueryWithContext generates a response with context injection.
QueryWithContext(ctx context.Context, query string, context []string) (string, error)
}
// NewClient creates a new AI client based on provider.
func NewClient(config *Config) Client {
switch config.Provider {
case "openai":
return NewOpenAIClient(config)
case "mock":
return NewMockClient(config.Dimensions)
default:
return NewMockClient(1536)
}
}
+298
View File
@@ -0,0 +1,298 @@
// Package ai provides AI integration for embeddings and context injection.
package ai
import (
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"strings"
"time"
)
// OpenAIClient implements AI operations using OpenAI API.
type OpenAIClient struct {
config *Config
httpClient *http.Client
}
// NewOpenAIClient creates a new OpenAI client.
func NewOpenAIClient(config *Config) *OpenAIClient {
apiKey := config.APIKey
if apiKey == "" {
apiKey = os.Getenv("OPENAI_API_KEY")
}
baseURL := config.BaseURL
if baseURL == "" {
baseURL = "https://api.openai.com/v1"
}
return &OpenAIClient{
config: &Config{
Provider: config.Provider,
Model: config.Model,
Dimensions: config.Dimensions,
APIKey: apiKey,
BaseURL: baseURL,
BatchSize: config.BatchSize,
Temperature: config.Temperature,
},
httpClient: &http.Client{Timeout: 60 * time.Second},
}
}
// EmbeddingRequest represents an embedding API request.
type EmbeddingRequest struct {
Model string `json:"model"`
Input []string `json:"input"`
Dimensions int `json:"dimensions,omitempty"`
}
// EmbeddingResponse represents an embedding API response.
type EmbeddingResponse struct {
Object string `json:"object"`
Data []struct {
Object string `json:"object"`
Index int `json:"index"`
Embedding []float32 `json:"embedding"`
} `json:"data"`
Model string `json:"model"`
Usage struct {
PromptTokens int `json:"prompt_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage"`
Error *APIError `json:"error,omitempty"`
}
// APIError represents an API error.
type APIError struct {
Message string `json:"message"`
Type string `json:"type"`
Code string `json:"code"`
}
func (e *APIError) Error() string {
return e.Message
}
// Embed generates embeddings for texts.
func (c *OpenAIClient) Embed(ctx context.Context, texts []string) ([][]float32, error) {
if c.config.APIKey == "" {
return nil, fmt.Errorf("OpenAI API key not configured")
}
model := c.config.Model
if model == "" {
model = "text-embedding-3-small"
}
batchSize := c.config.BatchSize
if batchSize == 0 {
batchSize = 100
}
var allEmbeddings [][]float32
// Process in batches
for i := 0; i < len(texts); i += batchSize {
end := i + batchSize
if end > len(texts) {
end = len(texts)
}
batch := texts[i:end]
embeddings, err := c.embedBatch(ctx, model, batch)
if err != nil {
return nil, err
}
allEmbeddings = append(allEmbeddings, embeddings...)
}
return allEmbeddings, nil
}
// embedBatch processes a single batch of texts.
func (c *OpenAIClient) embedBatch(ctx context.Context, model string, texts []string) ([][]float32, error) {
req := EmbeddingRequest{
Model: model,
Input: texts,
}
// Set dimensions if specified (for text-embedding-3 models)
if c.config.Dimensions > 0 && strings.HasPrefix(model, "text-embedding-3") {
req.Dimensions = c.config.Dimensions
}
body, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, "POST", c.config.BaseURL+"/embeddings", strings.NewReader(string(body)))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Authorization", "Bearer "+c.config.APIKey)
resp, err := c.httpClient.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
var embeddingResp EmbeddingResponse
if err := json.NewDecoder(resp.Body).Decode(&embeddingResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
if embeddingResp.Error != nil {
return nil, embeddingResp.Error
}
// Extract embeddings in order
embeddings := make([][]float32, len(texts))
for _, data := range embeddingResp.Data {
embeddings[data.Index] = data.Embedding
}
return embeddings, nil
}
// ChatRequest represents a chat completion request.
type ChatRequest struct {
Model string `json:"model"`
Messages []ChatMessage `json:"messages"`
Temperature float64 `json:"temperature,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// ChatMessage represents a chat message.
type ChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
// ChatResponse represents a chat completion response.
type ChatResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []struct {
Index int `json:"index"`
Message ChatMessage `json:"message"`
Finish string `json:"finish_reason"`
} `json:"choices"`
Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage"`
Error *APIError `json:"error,omitempty"`
}
// QueryWithContext generates a response with context injection.
func (c *OpenAIClient) QueryWithContext(ctx context.Context, query string, contextDocs []string) (string, error) {
if c.config.APIKey == "" {
return "", fmt.Errorf("OpenAI API key not configured")
}
model := c.config.Model
if model == "" || strings.Contains(model, "embedding") {
model = "gpt-4o-mini"
}
// Build context
contextText := strings.Join(contextDocs, "\n\n---\n\n")
// Build messages
messages := []ChatMessage{
{
Role: "system",
Content: "You are a helpful assistant that answers questions based on the provided context. " +
"Use the context to provide accurate and relevant answers. " +
"If the context doesn't contain relevant information, say so.",
},
{
Role: "user",
Content: fmt.Sprintf("Context:\n%s\n\nQuestion: %s", contextText, query),
},
}
req := ChatRequest{
Model: model,
Messages: messages,
}
if c.config.Temperature > 0 {
req.Temperature = c.config.Temperature
}
body, err := json.Marshal(req)
if err != nil {
return "", fmt.Errorf("failed to marshal request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, "POST", c.config.BaseURL+"/chat/completions", strings.NewReader(string(body)))
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Authorization", "Bearer "+c.config.APIKey)
resp, err := c.httpClient.Do(httpReq)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
var chatResp ChatResponse
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
return "", fmt.Errorf("failed to decode response: %w", err)
}
if chatResp.Error != nil {
return "", chatResp.Error
}
if len(chatResp.Choices) == 0 {
return "", fmt.Errorf("no response generated")
}
return chatResp.Choices[0].Message.Content, nil
}
// MockClient implements AI operations without external API calls.
type MockClient struct {
dimensions int
}
// NewMockClient creates a mock client for testing.
func NewMockClient(dimensions int) *MockClient {
return &MockClient{dimensions: dimensions}
}
// Embed generates mock embeddings.
func (c *MockClient) Embed(ctx context.Context, texts []string) ([][]float32, error) {
embeddings := make([][]float32, len(texts))
for i := range texts {
// Generate deterministic but varied embeddings
embedding := make([]float32, c.dimensions)
for j := range embedding {
embedding[j] = float32(i*100+j) / float32(c.dimensions*100)
}
embeddings[i] = embedding
}
return embeddings, nil
}
// QueryWithContext returns a mock response.
func (c *MockClient) QueryWithContext(ctx context.Context, query string, context []string) (string, error) {
return "This is a mock response.", nil
}
File diff suppressed because it is too large Load Diff
+271
View File
@@ -0,0 +1,271 @@
// Package indexer provides document indexing and embedding generation.
package indexer
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"sync"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/internal/vector"
)
// Config holds indexer configuration.
type Config struct {
Provider string `yaml:"provider"`
Model string `yaml:"model"`
Dimensions int `yaml:"dimensions"`
APIKey string `yaml:"api_key"`
BatchSize int `yaml:"batch_size"`
// Chunking settings
ChunkSize int `yaml:"chunk_size"`
ChunkOverlap int `yaml:"chunk_overlap"`
}
// Chunk represents a text chunk with its embedding.
type Chunk struct {
ID string `json:"id"`
DocID string `json:"doc_id"`
Content string `json:"content"`
Vector []float32 `json:"vector,omitempty"`
Metadata map[string]any `json:"metadata"`
Position int `json:"position"`
}
// Indexer handles document indexing with embeddings.
type Indexer struct {
config *Config
vectorStore vector.Store
embedder Embedder
mu sync.RWMutex
chunks map[string][]*Chunk // docID -> chunks
}
// Embedder generates embeddings for text.
type Embedder interface {
Embed(ctx context.Context, texts []string) ([][]float32, error)
}
// NewIndexer creates a new indexer.
func NewIndexer(config *Config, vectorConfig *vector.Config) *Indexer {
return &Indexer{
config: config,
vectorStore: vector.NewStore(vectorConfig),
chunks: make(map[string][]*Chunk),
}
}
// SetEmbedder sets the embedding provider.
func (i *Indexer) SetEmbedder(embedder Embedder) {
i.embedder = embedder
}
// Index processes documents and adds them to the vector store.
func (i *Indexer) Index(ctx context.Context, docs []*scraper.Document) error {
if i.embedder == nil {
return fmt.Errorf("embedder not configured")
}
for _, doc := range docs {
if err := i.indexDocument(ctx, doc); err != nil {
return fmt.Errorf("failed to index document %s: %w", doc.ID, err)
}
}
return nil
}
// indexDocument processes a single document.
func (i *Indexer) indexDocument(ctx context.Context, doc *scraper.Document) error {
// Chunk the document
chunks := i.chunkDocument(doc)
// Generate embeddings for all chunks
texts := make([]string, len(chunks))
for j, chunk := range chunks {
texts[j] = chunk.Content
}
embeddings, err := i.embedder.Embed(ctx, texts)
if err != nil {
return fmt.Errorf("failed to generate embeddings: %w", err)
}
// Create vector documents
vecDocs := make([]*vector.Document, len(chunks))
for j, chunk := range chunks {
chunk.Vector = embeddings[j]
vecDocs[j] = &vector.Document{
ID: chunk.ID,
Content: chunk.Content,
Vector: chunk.Vector,
Metadata: chunk.Metadata,
}
}
// Add to vector store
if err := i.vectorStore.Add(ctx, vecDocs); err != nil {
return fmt.Errorf("failed to add to vector store: %w", err)
}
// Store chunks
i.mu.Lock()
i.chunks[doc.ID] = chunks
i.mu.Unlock()
return nil
}
// chunkDocument splits a document into chunks.
func (i *Indexer) chunkDocument(doc *scraper.Document) []*Chunk {
chunkSize := i.config.ChunkSize
if chunkSize == 0 {
chunkSize = 512 // Default chunk size in tokens (approximate)
}
// Simple chunking by paragraphs and size
content := doc.Content
paragraphs := strings.Split(content, "\n\n")
var chunks []*Chunk
var currentChunk strings.Builder
var currentSize int
position := 0
for _, para := range paragraphs {
paraSize := len(para) // Approximate token count
// If paragraph fits, add it
if currentSize+paraSize <= chunkSize || currentSize == 0 {
if currentSize > 0 {
currentChunk.WriteString("\n\n")
}
currentChunk.WriteString(para)
currentSize += paraSize
} else {
// Save current chunk
if currentSize > 0 {
chunks = append(chunks, i.createChunk(doc, currentChunk.String(), position))
position++
}
currentChunk.Reset()
currentChunk.WriteString(para)
currentSize = paraSize
}
}
// Don't forget the last chunk
if currentSize > 0 {
chunks = append(chunks, i.createChunk(doc, currentChunk.String(), position))
}
// If no chunks were created, create one from the entire content
if len(chunks) == 0 {
chunks = append(chunks, i.createChunk(doc, content, 0))
}
return chunks
}
// createChunk creates a chunk from content.
func (i *Indexer) createChunk(doc *scraper.Document, content string, position int) *Chunk {
return &Chunk{
ID: generateChunkID(doc.ID, position),
DocID: doc.ID,
Content: content,
Metadata: map[string]any{
"source": doc.Source,
"title": doc.Title,
"url": doc.URL,
"type": doc.Type,
"position": position,
"timestamp": doc.Timestamp,
},
Position: position,
}
}
// Search finds similar chunks to the query.
func (i *Indexer) Search(ctx context.Context, query string, limit int, threshold float64) ([]*Chunk, error) {
if i.embedder == nil {
return nil, fmt.Errorf("embedder not configured")
}
// Generate embedding for query
embeddings, err := i.embedder.Embed(ctx, []string{query})
if err != nil {
return nil, fmt.Errorf("failed to embed query: %w", err)
}
// Search vector store
results, err := i.vectorStore.Search(ctx, embeddings[0], limit)
if err != nil {
return nil, fmt.Errorf("search failed: %w", err)
}
// Convert to chunks and filter by threshold
var chunks []*Chunk
for _, result := range results {
if result.Score >= threshold {
chunks = append(chunks, &Chunk{
ID: result.ID,
Content: result.Content,
Metadata: result.Metadata,
})
}
}
return chunks, nil
}
// Delete removes a document and its chunks.
func (i *Indexer) Delete(ctx context.Context, docID string) error {
i.mu.Lock()
defer i.mu.Unlock()
chunks, ok := i.chunks[docID]
if !ok {
return nil
}
// Delete chunks from vector store
for _, chunk := range chunks {
if err := i.vectorStore.Delete(ctx, chunk.ID); err != nil {
fmt.Printf("Warning: failed to delete chunk %s: %v\n", chunk.ID, err)
}
}
delete(i.chunks, docID)
return nil
}
// Stats returns indexing statistics.
func (i *Indexer) Stats(ctx context.Context) (map[string]interface{}, error) {
count, err := i.vectorStore.Count(ctx)
if err != nil {
return nil, err
}
i.mu.RLock()
docCount := len(i.chunks)
i.mu.RUnlock()
return map[string]interface{}{
"document_count": docCount,
"chunk_count": count,
"dimensions": i.config.Dimensions,
"model": i.config.Model,
}, nil
}
// generateChunkID generates a unique ID for a chunk.
func generateChunkID(docID string, position int) string {
data := fmt.Sprintf("%s-%d", docID, position)
hash := sha256.Sum256([]byte(data))
return hex.EncodeToString(hash[:12])
}
+182
View File
@@ -0,0 +1,182 @@
package markdown
import (
"bytes"
"fmt"
"regexp"
"strings"
"time"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
)
// Document represents a scraped document to be formatted as markdown
type Document struct {
ID string `json:"id"`
Source string `json:"source"`
Type string `json:"type"`
Title string `json:"title"`
Content string `json:"content"`
URL string `json:"url"`
Metadata map[string]interface{} `json:"metadata"`
Hash string `json:"hash"`
Timestamp time.Time `json:"timestamp"`
}
// Formatter handles markdown conversion and enhancement
type Formatter struct {
md goldmark.Markdown
}
// NewFormatter creates a new markdown formatter with extensions
func NewFormatter() *Formatter {
md := goldmark.New(
goldmark.WithExtensions(extension.GFM, extension.Table, extension.Strikethrough),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
),
goldmark.WithRendererOptions(
html.WithHardWraps(),
html.WithXHTML(),
),
)
return &Formatter{md: md}
}
// ToMarkdown converts a document to enhanced markdown format
func (f *Formatter) ToMarkdown(doc *Document) string {
var buf bytes.Buffer
// Header with metadata
buf.WriteString(fmt.Sprintf("# %s\n\n", doc.Title))
// Document metadata table
buf.WriteString("## 📋 Document Information\n\n")
buf.WriteString("| Property | Value |\n")
buf.WriteString("|----------|-------|\n")
buf.WriteString(fmt.Sprintf("| **Source** | %s |\n", doc.URL))
buf.WriteString(fmt.Sprintf("| **Type** | `%s` |\n", doc.Type))
buf.WriteString(fmt.Sprintf("| **Scraped** | %s |\n", doc.Timestamp.Format("2006-01-02 15:04:05")))
// Add metadata if available
if doc.Metadata != nil {
for key, value := range doc.Metadata {
if strValue := fmt.Sprintf("%v", value); strValue != "" && strValue != "<nil>" {
buf.WriteString(fmt.Sprintf("| **%s** | %s |\n", strings.Title(key), strValue))
}
}
}
buf.WriteString("\n")
// Process and enhance content
enhancedContent := f.enhanceContent(doc.Content)
buf.WriteString("## 📚 Content\n\n")
buf.WriteString(enhancedContent)
buf.WriteString("\n")
// Footer
buf.WriteString("---\n\n")
buf.WriteString(fmt.Sprintf("*Document scraped by [Devour](https://github.com/yourorg/devour) on %s*\n", doc.Timestamp.Format("2006-01-02 15:04:05")))
buf.WriteString(fmt.Sprintf("*Source: [%s](%s)*\n", doc.URL, doc.URL))
return buf.String()
}
// enhanceContent improves the readability of scraped content
func (f *Formatter) enhanceContent(content string) string {
// Clean up common issues
content = strings.TrimSpace(content)
// Fix multiple consecutive newlines
content = regexp.MustCompile(`\n{3,}`).ReplaceAllString(content, "\n\n")
// Ensure proper heading spacing
content = regexp.MustCompile(`([^\n])\n(#{1,6})`).ReplaceAllString(content, "$1\n\n$2")
// Fix code blocks that might be malformed
content = regexp.MustCompile("```(\\w+)?\\n([^`]*)\\n```").ReplaceAllStringFunc(content, func(match string) string {
// Ensure code blocks are properly formatted
lines := strings.Split(match, "\n")
if len(lines) < 3 {
return match
}
lang := strings.TrimPrefix(lines[0], "```")
codeContent := strings.Join(lines[1:len(lines)-1], "\n")
return fmt.Sprintf("```%s\n%s\n```", lang, codeContent)
})
// Convert plain URLs to markdown links
urlRegex := regexp.MustCompile(`(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s\!()\[\]{};:'".,<>?«»""'']))`)
content = urlRegex.ReplaceAllStringFunc(content, func(url string) string {
if strings.HasPrefix(url, "http") {
return fmt.Sprintf("[%s](%s)", url, url)
}
return url
})
// Add emoji indicators for common patterns
content = strings.ReplaceAll(content, "Example:", "💡 **Example:**")
content = strings.ReplaceAll(content, "Note:", "📝 **Note:**")
content = strings.ReplaceAll(content, "Warning:", "⚠️ **Warning:**")
content = strings.ReplaceAll(content, "Important:", "❗ **Important:**")
content = strings.ReplaceAll(content, "TODO:", "📋 **TODO:**")
return content
}
// ToTableOfContents generates a TOC for the document
func (f *Formatter) ToTableOfContents(content string) string {
lines := strings.Split(content, "\n")
var toc []string
var level int
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "#") {
// Count heading level
level = 0
for _, char := range line {
if char == '#' {
level++
} else {
break
}
}
if level <= 3 { // Only include up to H3 in TOC
title := strings.TrimSpace(line[level:])
anchor := strings.ToLower(strings.ReplaceAll(title, " ", "-"))
anchor = regexp.MustCompile(`[^a-z0-9\-]`).ReplaceAllString(anchor, "")
indent := strings.Repeat(" ", level-1)
toc = append(toc, fmt.Sprintf("%s- [%s](#%s)", indent, title, anchor))
}
}
}
if len(toc) == 0 {
return ""
}
return "## 📑 Table of Contents\n\n" + strings.Join(toc, "\n") + "\n\n"
}
// FormatWithTOC formats a document with an automatically generated table of contents
func (f *Formatter) FormatWithTOC(doc *Document) string {
markdown := f.ToMarkdown(doc)
// Insert TOC after the header but before the content
parts := strings.SplitN(markdown, "## 📚 Content", 2)
if len(parts) == 2 {
toc := f.ToTableOfContents(parts[1])
return parts[0] + toc + "## 📚 Content" + parts[1]
}
return markdown
}
+427
View File
@@ -0,0 +1,427 @@
package analyzers
import (
"context"
"fmt"
"go/ast"
"go/token"
"strings"
"github.com/yourorg/devour/internal/quality"
"golang.org/x/tools/go/packages"
)
type ControlFlowAnalyzer struct {
maxComplexity int
maxNesting int
maxFunctionLength int
}
func NewControlFlowAnalyzer() *ControlFlowAnalyzer {
return &ControlFlowAnalyzer{
maxComplexity: 15,
maxNesting: 4,
maxFunctionLength: 50,
}
}
func (a *ControlFlowAnalyzer) Name() string {
return "controlflow"
}
func (a *ControlFlowAnalyzer) Severity() quality.Severity {
return quality.SeverityT3
}
func (a *ControlFlowAnalyzer) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedFiles | packages.NeedSyntax,
Dir: path,
}
pkgs, err := packages.Load(cfg, "./...")
if err != nil {
return nil, fmt.Errorf("failed to load packages: %w", err)
}
var findings []quality.Finding
for _, pkg := range pkgs {
for _, file := range pkg.Syntax {
pos := pkg.Fset.Position(file.Pos())
findings = append(findings, a.analyzeFile(pkg.Fset, file, pos.Filename)...)
}
}
return findings, nil
}
func (a *ControlFlowAnalyzer) analyzeFile(fset *token.FileSet, file *ast.File, filename string) []quality.Finding {
var findings []quality.Finding
ast.Inspect(file, func(n ast.Node) bool {
switch node := n.(type) {
case *ast.FuncDecl:
findings = append(findings, a.analyzeFunction(fset, node, filename)...)
case *ast.IfStmt:
findings = append(findings, a.checkUnreachableCode(fset, node, filename)...)
case *ast.SwitchStmt:
findings = append(findings, a.analyzeSwitch(fset, node, filename)...)
case *ast.ForStmt:
findings = append(findings, a.analyzeLoop(fset, node, filename)...)
case *ast.RangeStmt:
findings = append(findings, a.analyzeRange(fset, node, filename)...)
}
return true
})
return findings
}
func (a *ControlFlowAnalyzer) analyzeFunction(fset *token.FileSet, fn *ast.FuncDecl, filename string) []quality.Finding {
var findings []quality.Finding
complexity := a.calculateCyclomaticComplexity(fn.Body)
startPos := fset.Position(fn.Pos())
endPos := fset.Position(fn.End())
loc := endPos.Line - startPos.Line + 1
if complexity > a.maxComplexity {
severity := quality.SeverityT3
score := complexity - a.maxComplexity
if complexity > a.maxComplexity*2 {
severity = quality.SeverityT4
score = (complexity - a.maxComplexity) * 2
}
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("cyclomatic-complexity::%s::%d", filename, startPos.Line),
Type: "complexity",
Title: fmt.Sprintf("High cyclomatic complexity in %s", fn.Name.Name),
Description: fmt.Sprintf("Function '%s' has cyclomatic complexity of %d (max: %d). Consider breaking it into smaller functions.", fn.Name.Name, complexity, a.maxComplexity),
File: filename,
Line: startPos.Line,
EndLine: endPos.Line,
Severity: severity,
Score: score,
Status: quality.StatusOpen,
Metadata: map[string]string{
"function": fn.Name.Name,
"complexity": fmt.Sprintf("%d", complexity),
"max": fmt.Sprintf("%d", a.maxComplexity),
},
})
}
if loc > a.maxFunctionLength {
severity := quality.SeverityT2
if loc > a.maxFunctionLength*2 {
severity = quality.SeverityT3
}
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("function-length::%s::%d", filename, startPos.Line),
Type: "complexity",
Title: fmt.Sprintf("Function too long: %s", fn.Name.Name),
Description: fmt.Sprintf("Function '%s' is %d lines (max: %d). Consider breaking it into smaller functions.", fn.Name.Name, loc, a.maxFunctionLength),
File: filename,
Line: startPos.Line,
Severity: severity,
Score: (loc - a.maxFunctionLength) / 10,
Status: quality.StatusOpen,
Metadata: map[string]string{
"function": fn.Name.Name,
"loc": fmt.Sprintf("%d", loc),
"max": fmt.Sprintf("%d", a.maxFunctionLength),
},
})
}
maxNesting := a.calculateMaxNesting(fn.Body)
if maxNesting > a.maxNesting {
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("deep-nesting::%s::%d", filename, startPos.Line),
Type: "complexity",
Title: fmt.Sprintf("Deep nesting in %s", fn.Name.Name),
Description: fmt.Sprintf("Function '%s' has nesting depth of %d (max: %d). Extract nested code into separate functions.", fn.Name.Name, maxNesting, a.maxNesting),
File: filename,
Line: startPos.Line,
Severity: quality.SeverityT3,
Score: maxNesting - a.maxNesting,
Status: quality.StatusOpen,
Metadata: map[string]string{
"function": fn.Name.Name,
"nesting": fmt.Sprintf("%d", maxNesting),
},
})
}
findings = append(findings, a.checkEarlyReturn(fset, fn, filename)...)
return findings
}
func (a *ControlFlowAnalyzer) calculateCyclomaticComplexity(node ast.Node) int {
complexity := 1
ast.Inspect(node, func(n ast.Node) bool {
switch n.(type) {
case *ast.IfStmt:
complexity++
case *ast.ForStmt:
complexity++
case *ast.RangeStmt:
complexity++
case *ast.CaseClause:
complexity++
case *ast.BinaryExpr:
if e, ok := n.(*ast.BinaryExpr); ok {
if e.Op == token.LAND || e.Op == token.LOR {
complexity++
}
}
}
return true
})
return complexity
}
func (a *ControlFlowAnalyzer) calculateMaxNesting(node ast.Node) int {
return a.nestingDepth(node, 0)
}
func (a *ControlFlowAnalyzer) nestingDepth(node ast.Node, current int) int {
maxDepth := current
ast.Inspect(node, func(n ast.Node) bool {
var childNode ast.Node
switch stmt := n.(type) {
case *ast.IfStmt:
childNode = stmt.Body
case *ast.ForStmt:
childNode = stmt.Body
case *ast.RangeStmt:
childNode = stmt.Body
case *ast.SelectStmt:
childNode = stmt.Body
case *ast.SwitchStmt:
childNode = stmt.Body
case *ast.TypeSwitchStmt:
childNode = stmt.Body
case *ast.BlockStmt:
childNode = nil
default:
return true
}
if childNode != nil {
depth := a.nestingDepth(childNode, current+1)
if depth > maxDepth {
maxDepth = depth
}
}
return true
})
return maxDepth
}
func (a *ControlFlowAnalyzer) checkEarlyReturn(fset *token.FileSet, fn *ast.FuncDecl, filename string) []quality.Finding {
var findings []quality.Finding
if fn.Body == nil || len(fn.Body.List) < 2 {
return findings
}
ifStmt, ok := fn.Body.List[0].(*ast.IfStmt)
if !ok || ifStmt.Else == nil {
return findings
}
if _, ok := ifStmt.Else.(*ast.BlockStmt); ok && len(fn.Body.List) > 1 {
startPos := fset.Position(ifStmt.Pos())
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("early-return::%s::%d", filename, startPos.Line),
Type: "quality",
Title: fmt.Sprintf("Use early return pattern in %s", fn.Name.Name),
Description: "Consider using early return instead of if-else to reduce nesting and improve readability.",
File: filename,
Line: startPos.Line,
Severity: quality.SeverityT1,
Score: 1,
Status: quality.StatusOpen,
Metadata: map[string]string{
"function": fn.Name.Name,
},
})
}
return findings
}
func (a *ControlFlowAnalyzer) checkUnreachableCode(fset *token.FileSet, stmt *ast.IfStmt, filename string) []quality.Finding {
var findings []quality.Finding
a.checkUnreachableInBranch(fset, stmt.Body, filename, &findings)
if stmt.Else != nil {
if elseBlock, ok := stmt.Else.(*ast.BlockStmt); ok {
a.checkUnreachableInBranch(fset, elseBlock, filename, &findings)
}
}
return findings
}
func (a *ControlFlowAnalyzer) checkUnreachableInBranch(fset *token.FileSet, block *ast.BlockStmt, filename string, findings *[]quality.Finding) {
hasReturn := false
for _, stmt := range block.List {
if hasReturn {
pos := fset.Position(stmt.Pos())
*findings = append(*findings, quality.Finding{
ID: fmt.Sprintf("unreachable::%s::%d", filename, pos.Line),
Type: "dead_code",
Title: "Unreachable code after return",
Description: "Code after return statement will never be executed.",
File: filename,
Line: pos.Line,
Severity: quality.SeverityT2,
Score: 3,
Status: quality.StatusOpen,
})
break
}
if _, ok := stmt.(*ast.ReturnStmt); ok {
hasReturn = true
}
}
}
func (a *ControlFlowAnalyzer) analyzeSwitch(fset *token.FileSet, stmt *ast.SwitchStmt, filename string) []quality.Finding {
var findings []quality.Finding
pos := fset.Position(stmt.Pos())
hasDefault := false
caseCount := 0
for _, s := range stmt.Body.List {
if clause, ok := s.(*ast.CaseClause); ok {
caseCount++
if clause.List == nil {
hasDefault = true
}
}
}
if !hasDefault && caseCount > 0 {
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("switch-no-default::%s::%d", filename, pos.Line),
Type: "quality",
Title: "Switch without default case",
Description: "Switch statement lacks a default case. Consider handling unexpected values explicitly.",
File: filename,
Line: pos.Line,
Severity: quality.SeverityT1,
Score: 1,
Status: quality.StatusOpen,
})
}
if caseCount > 10 {
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("switch-too-many-cases::%s::%d", filename, pos.Line),
Type: "complexity",
Title: "Switch with too many cases",
Description: fmt.Sprintf("Switch has %d cases. Consider using a map or polymorphism instead.", caseCount),
File: filename,
Line: pos.Line,
Severity: quality.SeverityT2,
Score: caseCount / 5,
Status: quality.StatusOpen,
Metadata: map[string]string{
"case_count": fmt.Sprintf("%d", caseCount),
},
})
}
return findings
}
func (a *ControlFlowAnalyzer) analyzeLoop(fset *token.FileSet, stmt *ast.ForStmt, filename string) []quality.Finding {
var findings []quality.Finding
pos := fset.Position(stmt.Pos())
if stmt.Cond == nil && stmt.Post == nil {
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("infinite-loop::%s::%d", filename, pos.Line),
Type: "quality",
Title: "Potential infinite loop",
Description: "For loop has no condition and no post statement. Ensure there's a break inside.",
File: filename,
Line: pos.Line,
Severity: quality.SeverityT3,
Score: 4,
Status: quality.StatusOpen,
})
}
if strings.Contains(fmt.Sprintf("%v", stmt.Cond), "== true") {
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("redundant-bool-compare::%s::%d", filename, pos.Line),
Type: "quality",
Title: "Redundant boolean comparison",
Description: "Comparing to 'true' is redundant. Use the boolean value directly.",
File: filename,
Line: pos.Line,
Severity: quality.SeverityT1,
Score: 1,
Status: quality.StatusOpen,
})
}
return findings
}
func (a *ControlFlowAnalyzer) analyzeRange(fset *token.FileSet, stmt *ast.RangeStmt, filename string) []quality.Finding {
var findings []quality.Finding
pos := fset.Position(stmt.Pos())
if stmt.Key != nil {
if ident, ok := stmt.Key.(*ast.Ident); ok && ident.Name == "_" {
} else if stmt.Body != nil {
used := false
keyName := ""
if ident, ok := stmt.Key.(*ast.Ident); ok {
keyName = ident.Name
}
ast.Inspect(stmt.Body, func(n ast.Node) bool {
if ident, ok := n.(*ast.Ident); ok && ident.Name == keyName {
used = true
}
return true
})
if !used && keyName != "" {
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("unused-range-key::%s::%d", filename, pos.Line),
Type: "quality",
Title: "Unused range key",
Description: fmt.Sprintf("Range key '%s' is not used. Use '_' to ignore it explicitly.", keyName),
File: filename,
Line: pos.Line,
Severity: quality.SeverityT1,
Score: 1,
Status: quality.StatusOpen,
Metadata: map[string]string{
"variable": keyName,
},
})
}
}
}
return findings
}
+471
View File
@@ -0,0 +1,471 @@
package analyzers
import (
"context"
"fmt"
"go/ast"
"go/token"
"go/types"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/yourorg/devour/internal/quality"
"golang.org/x/tools/go/packages"
)
type DataFlowAnalyzer struct {
fset *token.FileSet
pkgs []*packages.Package
findings []quality.Finding
taintSrcs map[string][]TaintSource
sanitizeFn map[string]bool
}
type TaintSource struct {
Name string
Package string
Category string
Description string
}
type DataFlowFinding struct {
Source string
Sink string
Path []string
Line int
File string
Severity quality.Severity
Description string
}
func NewDataFlowAnalyzer() *DataFlowAnalyzer {
d := &DataFlowAnalyzer{
findings: make([]quality.Finding, 0),
taintSrcs: make(map[string][]TaintSource),
}
d.initTaintSources()
d.initSanitizers()
return d
}
func (d *DataFlowAnalyzer) initTaintSources() {
d.taintSrcs["net/http"] = []TaintSource{
{Name: "FormValue", Package: "net/http", Category: "http-input", Description: "HTTP form value - user controlled"},
{Name: "PostFormValue", Package: "net/http", Category: "http-input", Description: "HTTP POST form value - user controlled"},
{Name: "FormFile", Package: "net/http", Category: "http-input", Description: "HTTP uploaded file - user controlled"},
{Name: "Cookie", Package: "net/http", Category: "http-input", Description: "HTTP cookie - user controlled"},
{Name: "Header", Package: "net/http", Category: "http-input", Description: "HTTP header - user controlled"},
{Name: "URL", Package: "net/http", Category: "http-input", Description: "Request URL - user controlled"},
{Name: "Body", Package: "net/http", Category: "http-input", Description: "Request body - user controlled"},
}
d.taintSrcs["os"] = []TaintSource{
{Name: "Getenv", Package: "os", Category: "env", Description: "Environment variable - environment controlled"},
{Name: "Args", Package: "os", Category: "cli", Description: "Command line arguments - user controlled"},
{Name: "Stdin", Package: "os", Category: "io", Description: "Standard input - user controlled"},
}
d.taintSrcs["bufio"] = []TaintSource{
{Name: "ReadString", Package: "bufio", Category: "io", Description: "Reader input - potentially user controlled"},
{Name: "ReadBytes", Package: "bufio", Category: "io", Description: "Reader input - potentially user controlled"},
{Name: "ReadLine", Package: "bufio", Category: "io", Description: "Reader input - potentially user controlled"},
}
d.taintSrcs["io"] = []TaintSource{
{Name: "ReadAll", Package: "io", Category: "io", Description: "Read all from reader - potentially user controlled"},
}
}
func (d *DataFlowAnalyzer) initSanitizers() {
d.sanitizeFn = map[string]bool{
"html.EscapeString": true,
"template.HTMLEscape": true,
"template.JSEscape": true,
"url.QueryEscape": true,
"url.PathEscape": true,
"sql.Named": true,
"regexp.QuoteMeta": true,
"strconv.Quote": true,
}
}
func (d *DataFlowAnalyzer) Name() string {
return "dataflow"
}
func (d *DataFlowAnalyzer) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *DataFlowAnalyzer) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedFiles | packages.NeedSyntax,
Dir: path,
}
pkgs, err := packages.Load(cfg, "./...")
if err != nil {
return nil, fmt.Errorf("failed to load packages: %w", err)
}
d.pkgs = pkgs
d.fset = pkgs[0].Fset
for _, pkg := range pkgs {
d.analyzePackage(pkg)
}
return d.findings, nil
}
func (d *DataFlowAnalyzer) analyzePackage(pkg *packages.Package) {
for _, file := range pkg.Syntax {
d.analyzeFile(pkg, file)
}
}
func (d *DataFlowAnalyzer) analyzeFile(pkg *packages.Package, file *ast.File) {
tainted := make(map[string]TaintSource)
propagations := make(map[string][]string)
ast.Inspect(file, func(n ast.Node) bool {
switch node := n.(type) {
case *ast.AssignStmt:
d.analyzeAssignment(pkg, node, tainted, propagations)
case *ast.CallExpr:
d.analyzeCall(pkg, node, tainted, file)
case *ast.ValueSpec:
d.analyzeValueSpec(pkg, node, tainted)
}
return true
})
}
func (d *DataFlowAnalyzer) analyzeAssignment(pkg *packages.Package, node *ast.AssignStmt, tainted map[string]TaintSource, propagations map[string][]string) {
for i, expr := range node.Lhs {
if ident, ok := expr.(*ast.Ident); ok {
if i < len(node.Rhs) {
if source := d.getTaintSource(pkg, node.Rhs[i]); source != nil {
tainted[ident.Name] = *source
}
if rhsIdent, ok := node.Rhs[i].(*ast.Ident); ok {
if t, exists := tainted[rhsIdent.Name]; exists {
tainted[ident.Name] = t
}
}
}
}
}
}
func (d *DataFlowAnalyzer) analyzeCall(pkg *packages.Package, node *ast.CallExpr, tainted map[string]TaintSource, file *ast.File) {
fnName := d.getCallName(node)
if d.isDangerousSink(fnName) {
for _, arg := range node.Args {
if ident, ok := arg.(*ast.Ident); ok {
if source, exists := tainted[ident.Name]; exists {
pos := d.fset.Position(node.Pos())
d.findings = append(d.findings, quality.Finding{
ID: fmt.Sprintf("taint-flow::%s::%d", pos.Filename, pos.Line),
Type: "security",
Title: fmt.Sprintf("Tainted data flows to dangerous sink: %s", fnName),
Description: fmt.Sprintf("User-controlled input from %s flows to %s without sanitization. This may lead to injection vulnerabilities.", source.Description, fnName),
File: pos.Filename,
Line: pos.Line,
Severity: quality.SeverityT4,
Score: 8,
Status: quality.StatusOpen,
Metadata: map[string]string{
"source": source.Name,
"source_type": source.Category,
"sink": fnName,
"variable": ident.Name,
},
})
}
}
}
}
for _, arg := range node.Args {
d.checkSQLInjection(pkg, arg, tainted, node)
d.checkCommandInjection(pkg, arg, tainted, node)
d.checkPathTraversal(pkg, arg, tainted, node)
}
}
func (d *DataFlowAnalyzer) getTaintSource(pkg *packages.Package, expr ast.Expr) *TaintSource {
call, ok := expr.(*ast.CallExpr)
if !ok {
return nil
}
sel, ok := call.Fun.(*ast.SelectorExpr)
if !ok {
return nil
}
pkgIdent, ok := sel.X.(*ast.Ident)
if !ok {
return nil
}
pkgName := pkgIdent.Name
fnName := sel.Sel.Name
if sources, exists := d.taintSrcs[pkgName]; exists {
for _, src := range sources {
if src.Name == fnName {
return &src
}
}
}
if obj := pkg.TypesInfo.Uses[pkgIdent]; obj != nil {
if pkgObj, ok := obj.(*types.PkgName); ok {
if sources, exists := d.taintSrcs[pkgObj.Imported().Path()]; exists {
for _, src := range sources {
if src.Name == fnName {
return &src
}
}
}
}
}
return nil
}
func (d *DataFlowAnalyzer) getCallName(node *ast.CallExpr) string {
switch fn := node.Fun.(type) {
case *ast.SelectorExpr:
if ident, ok := fn.X.(*ast.Ident); ok {
return ident.Name + "." + fn.Sel.Name
}
return fn.Sel.Name
case *ast.Ident:
return fn.Name
}
return ""
}
func (d *DataFlowAnalyzer) isDangerousSink(fnName string) bool {
dangerousSinks := map[string]bool{
"exec.Command": true,
"exec.CommandContext": true,
"os/exec.Command": true,
"db.Exec": true,
"db.Query": true,
"db.QueryRow": true,
"sql.DB.Exec": true,
"sql.DB.Query": true,
"os.WriteFile": true,
"os.Create": true,
"os.OpenFile": true,
"ioutil.WriteFile": true,
"template.Parse": true,
"html.template.Parse": true,
"fmt.Fprintf": true,
"fmt.Printf": true,
"fmt.Sprintf": true,
}
return dangerousSinks[fnName]
}
func (d *DataFlowAnalyzer) checkSQLInjection(pkg *packages.Package, arg ast.Expr, tainted map[string]TaintSource, node *ast.CallExpr) {
fnName := d.getCallName(node)
if !strings.Contains(fnName, "Exec") && !strings.Contains(fnName, "Query") {
return
}
if basic, ok := arg.(*ast.BasicLit); ok {
query := strings.Trim(basic.Value, "`\"")
if strings.Contains(query, "%s") || strings.Contains(query, "%v") || strings.Contains(query, "+") {
pos := d.fset.Position(node.Pos())
d.findings = append(d.findings, quality.Finding{
ID: fmt.Sprintf("sql-injection::%s::%d", pos.Filename, pos.Line),
Type: "security",
Title: "Potential SQL injection vulnerability",
Description: "SQL query constructed with string formatting. Use parameterized queries instead.",
File: pos.Filename,
Line: pos.Line,
Severity: quality.SeverityT4,
Score: 10,
Status: quality.StatusOpen,
Metadata: map[string]string{
"vulnerability": "sql-injection",
"pattern": "string-formatting-in-query",
},
})
}
}
}
func (d *DataFlowAnalyzer) checkCommandInjection(pkg *packages.Package, arg ast.Expr, tainted map[string]TaintSource, node *ast.CallExpr) {
fnName := d.getCallName(node)
if !strings.Contains(fnName, "exec.Command") {
return
}
if ident, ok := arg.(*ast.Ident); ok {
if _, exists := tainted[ident.Name]; exists {
pos := d.fset.Position(node.Pos())
d.findings = append(d.findings, quality.Finding{
ID: fmt.Sprintf("command-injection::%s::%d", pos.Filename, pos.Line),
Type: "security",
Title: "Potential command injection vulnerability",
Description: "User-controlled input flows to exec.Command. Sanitize or validate input before use.",
File: pos.Filename,
Line: pos.Line,
Severity: quality.SeverityT4,
Score: 10,
Status: quality.StatusOpen,
Metadata: map[string]string{
"vulnerability": "command-injection",
"variable": ident.Name,
},
})
}
}
}
func (d *DataFlowAnalyzer) checkPathTraversal(pkg *packages.Package, arg ast.Expr, tainted map[string]TaintSource, node *ast.CallExpr) {
fnName := d.getCallName(node)
pathFunctions := map[string]bool{
"os.Open": true,
"os.OpenFile": true,
"os.Create": true,
"os.WriteFile": true,
"os.ReadFile": true,
"ioutil.ReadFile": true,
"ioutil.WriteFile": true,
"filepath.Join": true,
"filepath.Walk": true,
}
if !pathFunctions[fnName] {
return
}
if ident, ok := arg.(*ast.Ident); ok {
if _, exists := tainted[ident.Name]; exists {
pos := d.fset.Position(node.Pos())
d.findings = append(d.findings, quality.Finding{
ID: fmt.Sprintf("path-traversal::%s::%d", pos.Filename, pos.Line),
Type: "security",
Title: "Potential path traversal vulnerability",
Description: "User-controlled input used in file path operation. Validate and sanitize paths.",
File: pos.Filename,
Line: pos.Line,
Severity: quality.SeverityT4,
Score: 8,
Status: quality.StatusOpen,
Metadata: map[string]string{
"vulnerability": "path-traversal",
"variable": ident.Name,
},
})
}
}
}
func (d *DataFlowAnalyzer) analyzeValueSpec(pkg *packages.Package, node *ast.ValueSpec, tainted map[string]TaintSource) {
for i, name := range node.Names {
if i < len(node.Values) {
if source := d.getTaintSource(pkg, node.Values[i]); source != nil {
tainted[name.Name] = *source
}
}
}
}
type SecretsDetector struct {
patterns []SecretPattern
}
type SecretPattern struct {
Name string
Pattern *regexp.Regexp
Severity quality.Severity
}
func NewSecretsDetector() *SecretsDetector {
d := &SecretsDetector{
patterns: []SecretPattern{
{Name: "AWS Access Key", Pattern: regexp.MustCompile(`AKIA[0-9A-Z]{16}`), Severity: quality.SeverityT4},
{Name: "AWS Secret Key", Pattern: regexp.MustCompile(`(?i)aws(.{0,20})?['\"][0-9a-zA-Z/+=]{40}['\"]`), Severity: quality.SeverityT4},
{Name: "GitHub Token", Pattern: regexp.MustCompile(`ghp_[0-9a-zA-Z]{36}`), Severity: quality.SeverityT4},
{Name: "GitHub OAuth", Pattern: regexp.MustCompile(`gho_[0-9a-zA-Z]{36}`), Severity: quality.SeverityT4},
{Name: "GitHub App Token", Pattern: regexp.MustCompile(`(ghu|ghs)_[0-9a-zA-Z]{36}`), Severity: quality.SeverityT4},
{Name: "Slack Token", Pattern: regexp.MustCompile(`xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9]{24}`), Severity: quality.SeverityT4},
{Name: "RSA Private Key", Pattern: regexp.MustCompile(`-----BEGIN RSA PRIVATE KEY-----`), Severity: quality.SeverityT4},
{Name: "Private Key", Pattern: regexp.MustCompile(`-----BEGIN PRIVATE KEY-----`), Severity: quality.SeverityT4},
{Name: "JWT", Pattern: regexp.MustCompile(`eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*`), Severity: quality.SeverityT3},
{Name: "Generic API Key", Pattern: regexp.MustCompile(`(?i)(api_key|apikey|secret|password|token)\s*[=:]\s*['"][^'"]{8,}['"]`), Severity: quality.SeverityT3},
{Name: "DB Connection String", Pattern: regexp.MustCompile(`(?i)(mysql|postgres|mongodb)://[^:]+:[^@]+@[^/]+`), Severity: quality.SeverityT4},
},
}
return d
}
func (d *SecretsDetector) Name() string {
return "secrets"
}
func (d *SecretsDetector) Severity() quality.Severity {
return quality.SeverityT4
}
func (d *SecretsDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
var findings []quality.Finding
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() {
return nil
}
ext := filepath.Ext(filePath)
if ext != ".go" && ext != ".ts" && ext != ".js" && ext != ".py" && ext != ".java" && ext != ".yaml" && ext != ".yml" && ext != ".json" && ext != ".env" && ext != "" {
return nil
}
if strings.Contains(filePath, "_test.go") || strings.Contains(filePath, "vendor/") || strings.Contains(filePath, "node_modules/") {
return nil
}
data, err := os.ReadFile(filePath)
if err != nil {
return nil
}
content := string(data)
for _, pattern := range d.patterns {
matches := pattern.Pattern.FindAllStringIndex(content, -1)
for _, match := range matches {
line := strings.Count(content[:match[0]], "\n") + 1
findings = append(findings, quality.Finding{
ID: fmt.Sprintf("secret::%s::%d::%s", filePath, line, pattern.Name),
Type: "security",
Title: fmt.Sprintf("Potential %s detected", pattern.Name),
Description: fmt.Sprintf("A potential %s was found in source code. Remove it and use environment variables or secret management.", pattern.Name),
File: filePath,
Line: line,
Severity: pattern.Severity,
Score: 10,
Status: quality.StatusOpen,
Metadata: map[string]string{
"secret_type": pattern.Name,
},
})
}
}
return nil
})
if err != nil {
return nil, err
}
return findings, nil
}
+601
View File
@@ -0,0 +1,601 @@
package analyzers
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
)
type BestPractice struct {
ID string
Category string // security, architecture, performance, quality
Title string
Description string
Pattern string
Language string
Framework string
Severity string
Reference string
CodeExample string
}
type PracticesFetcher struct {
cache map[string][]BestPractice
cacheMu sync.RWMutex
docsPath string
language string
frameworks []string
}
func NewPracticesFetcher() *PracticesFetcher {
return &PracticesFetcher{
cache: make(map[string][]BestPractice),
}
}
func (f *PracticesFetcher) DetectLanguage(path string) string {
markers := map[string]string{
"go.mod": "go",
"go.sum": "go",
"package.json": "javascript",
"tsconfig.json": "typescript",
"requirements.txt": "python",
"pyproject.toml": "python",
"setup.py": "python",
"Cargo.toml": "rust",
"pom.xml": "java",
"build.gradle": "java",
"composer.json": "php",
"Gemfile": "ruby",
}
for file, lang := range markers {
if _, err := os.Stat(filepath.Join(path, file)); err == nil {
f.language = lang
return lang
}
}
return "go"
}
func (f *PracticesFetcher) DetectFrameworks(path, language string) []string {
frameworks := []string{}
switch language {
case "go":
if f.hasImport(path, "github.com/gin-gonic") {
frameworks = append(frameworks, "gin")
}
if f.hasImport(path, "github.com/labstack/echo") {
frameworks = append(frameworks, "echo")
}
if f.hasImport(path, "github.com/gofiber/fiber") {
frameworks = append(frameworks, "fiber")
}
if f.hasImport(path, "gorm.io") {
frameworks = append(frameworks, "gorm")
}
if f.hasImport(path, "github.com/spf13/cobra") {
frameworks = append(frameworks, "cobra")
}
if f.hasImport(path, "k8s.io/client-go") {
frameworks = append(frameworks, "kubernetes")
}
case "typescript", "javascript":
pkgPath := filepath.Join(path, "package.json")
if data, err := os.ReadFile(pkgPath); err == nil {
content := string(data)
if strings.Contains(content, `"react"`) || strings.Contains(content, `"next"`) {
frameworks = append(frameworks, "react")
}
if strings.Contains(content, `"vue"`) {
frameworks = append(frameworks, "vue")
}
if strings.Contains(content, `"express"`) {
frameworks = append(frameworks, "express")
}
if strings.Contains(content, `"nestjs"`) || strings.Contains(content, `"@nestjs"`) {
frameworks = append(frameworks, "nestjs")
}
}
case "python":
reqPath := filepath.Join(path, "requirements.txt")
if data, err := os.ReadFile(reqPath); err == nil {
content := strings.ToLower(string(data))
if strings.Contains(content, "django") {
frameworks = append(frameworks, "django")
}
if strings.Contains(content, "flask") {
frameworks = append(frameworks, "flask")
}
if strings.Contains(content, "fastapi") {
frameworks = append(frameworks, "fastapi")
}
}
}
f.frameworks = frameworks
return frameworks
}
func (f *PracticesFetcher) hasImport(path, importPath string) bool {
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() || !strings.HasSuffix(filePath, ".go") {
return nil
}
data, err := os.ReadFile(filePath)
if err != nil {
return nil
}
if strings.Contains(string(data), importPath) {
return fmt.Errorf("found")
}
return nil
})
return err != nil
}
func (f *PracticesFetcher) FetchPractices(ctx context.Context, language string, frameworks []string) ([]BestPractice, error) {
cacheKey := language + ":" + strings.Join(frameworks, ",")
f.cacheMu.RLock()
if practices, ok := f.cache[cacheKey]; ok {
f.cacheMu.RUnlock()
return practices, nil
}
f.cacheMu.RUnlock()
practices := f.getBuiltInPractices(language, frameworks)
f.cacheMu.Lock()
f.cache[cacheKey] = practices
f.cacheMu.Unlock()
return practices, nil
}
func (f *PracticesFetcher) getBuiltInPractices(language string, frameworks []string) []BestPractice {
var practices []BestPractice
practices = append(practices, f.getLanguagePractices(language)...)
for _, fw := range frameworks {
practices = append(practices, f.getFrameworkPractices(fw)...)
}
practices = append(practices, f.getSecurityPractices(language)...)
practices = append(practices, f.getArchitecturePractices()...)
practices = append(practices, f.getPerformancePractices(language)...)
return practices
}
func (f *PracticesFetcher) getLanguagePractices(lang string) []BestPractice {
var practices []BestPractice
switch lang {
case "go":
practices = append(practices, []BestPractice{
{
ID: "go:error-handling",
Category: "quality",
Title: "Always handle errors explicitly",
Description: "Never ignore errors. Each error should be handled, wrapped with context, or explicitly logged.",
Pattern: `if err != nil`,
Language: "go",
Severity: "high",
Reference: "https://go.dev/blog/error-handling-and-go",
},
{
ID: "go:defer-in-loop",
Category: "performance",
Title: "Avoid defer in loops",
Description: "defer in loops causes resources to be held until function returns. Move loop body to a separate function.",
Pattern: `for.*\{[\s\S]*defer`,
Language: "go",
Severity: "medium",
},
{
ID: "go:context-first",
Category: "architecture",
Title: "context.Context should be first parameter",
Description: "Functions that use context should accept it as the first parameter.",
Pattern: `func\s+\w+\([^)]*context\.Context`,
Language: "go",
Severity: "low",
},
{
ID: "go:interface-location",
Category: "architecture",
Title: "Define interfaces where they are used",
Description: "Interfaces should be defined by the consumer, not the implementer. This promotes loose coupling.",
Language: "go",
Severity: "medium",
},
{
ID: "go:exported-comments",
Category: "quality",
Title: "Exported symbols must have documentation comments",
Description: "All exported functions, types, and variables should have doc comments starting with their name.",
Language: "go",
Severity: "low",
Reference: "https://go.dev/doc/comment",
},
{
ID: "go:receiver-type",
Category: "architecture",
Title: "Use pointer receivers consistently",
Description: "If any method has a pointer receiver, all methods should have pointer receivers. Use value receivers for small immutable types.",
Language: "go",
Severity: "low",
},
{
ID: "go:goroutine-leak",
Category: "performance",
Title: "Goroutines must have a termination path",
Description: "Every goroutine should have a clear termination condition, typically via context cancellation or a done channel.",
Language: "go",
Severity: "high",
},
}...)
case "typescript", "javascript":
practices = append(practices, []BestPractice{
{
ID: "ts:async-await",
Category: "quality",
Title: "Prefer async/await over raw Promises",
Description: "async/await provides better readability and error handling than .then() chains.",
Language: "typescript",
Severity: "low",
},
{
ID: "ts:any-type",
Category: "quality",
Title: "Avoid the any type",
Description: "Use specific types or unknown instead of any to maintain type safety.",
Pattern: `:\s*any\b`,
Language: "typescript",
Severity: "medium",
},
{
ID: "ts:null-check",
Category: "quality",
Title: "Use strict null checks",
Description: "Enable strictNullChecks in tsconfig.json and handle null/undefined explicitly.",
Language: "typescript",
Severity: "medium",
},
}...)
case "python":
practices = append(practices, []BestPractice{
{
ID: "py:type-hints",
Category: "quality",
Title: "Use type hints for function signatures",
Description: "Add type annotations to function parameters and return values for better documentation and tooling.",
Language: "python",
Severity: "low",
},
{
ID: "py:context-manager",
Category: "quality",
Title: "Use context managers for resource handling",
Description: "Always use 'with' statements for files, connections, and other resources.",
Pattern: `with\s+\w+`,
Language: "python",
Severity: "medium",
},
}...)
}
return practices
}
func (f *PracticesFetcher) getFrameworkPractices(framework string) []BestPractice {
var practices []BestPractice
switch framework {
case "gin", "echo", "fiber", "express":
practices = append(practices, []BestPractice{
{
ID: "web:input-validation",
Category: "security",
Title: "Validate all user input",
Description: "Never trust user input. Validate and sanitize all request parameters, body, and headers.",
Severity: "critical",
Framework: framework,
},
{
ID: "web:error-exposure",
Category: "security",
Title: "Don't expose internal errors to users",
Description: "Log detailed errors internally but return generic error messages to users.",
Severity: "high",
Framework: framework,
},
{
ID: "web:rate-limiting",
Category: "security",
Title: "Implement rate limiting",
Description: "Protect endpoints with rate limiting to prevent abuse and DoS attacks.",
Severity: "high",
Framework: framework,
},
{
ID: "web:security-headers",
Category: "security",
Title: "Set security headers",
Description: "Include X-Content-Type-Options, X-Frame-Options, Content-Security-Policy headers.",
Severity: "medium",
Framework: framework,
},
}...)
case "react", "vue":
practices = append(practices, []BestPractice{
{
ID: "frontend:xss-prevention",
Category: "security",
Title: "Prevent XSS vulnerabilities",
Description: "Never use dangerouslySetInnerHTML/v-html with user content. Sanitize all user input.",
Severity: "critical",
Framework: framework,
},
{
ID: "frontend:dependency-audit",
Category: "security",
Title: "Audit dependencies regularly",
Description: "Run npm audit or yarn audit regularly and update vulnerable packages.",
Severity: "high",
Framework: framework,
},
}...)
case "django", "fastapi", "flask":
practices = append(practices, []BestPractice{
{
ID: "django:sql-injection",
Category: "security",
Title: "Use ORM to prevent SQL injection",
Description: "Never use raw string formatting in SQL queries. Always use parameterized queries or ORM methods.",
Severity: "critical",
Framework: framework,
},
{
ID: "django:csrf-protection",
Category: "security",
Title: "Enable CSRF protection",
Description: "Ensure CSRF middleware is enabled for all state-changing operations.",
Severity: "high",
Framework: framework,
},
}...)
}
return practices
}
func (f *PracticesFetcher) getSecurityPractices(lang string) []BestPractice {
return []BestPractice{
{
ID: "sec:hardcoded-secrets",
Category: "security",
Title: "No hardcoded secrets",
Description: "Never commit secrets, API keys, passwords, or tokens in source code. Use environment variables or secret management.",
Pattern: `(password|secret|api_key|apikey|token)\s*[=:]\s*['"][^'"]+['"]`,
Severity: "critical",
Reference: "https://owasp.org/www-project-web-security-testing-guide/",
},
{
ID: "sec:sql-injection",
Category: "security",
Title: "Prevent SQL injection",
Description: "Use parameterized queries or prepared statements. Never concatenate user input into SQL strings.",
Severity: "critical",
Reference: "https://owasp.org/www-community/attacks/SQL_Injection",
},
{
ID: "sec:xss-prevention",
Category: "security",
Title: "Prevent Cross-Site Scripting (XSS)",
Description: "Encode output, validate input, use Content-Security-Policy headers.",
Severity: "critical",
Reference: "https://owasp.org/www-community/attacks/xss/",
},
{
ID: "sec:insecure-deserialization",
Category: "security",
Title: "Avoid insecure deserialization",
Description: "Don't deserialize untrusted data. Validate and sanitize all serialized input.",
Severity: "critical",
Reference: "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data",
},
{
ID: "sec:weak-crypto",
Category: "security",
Title: "Use strong cryptography",
Description: "Use modern algorithms (AES-256-GCM, SHA-256+, RSA-2048+). Never use MD5, SHA1 for security purposes.",
Pattern: `(md5|sha1)\s*\(`,
Severity: "high",
},
{
ID: "sec:logging-sensitive",
Category: "security",
Title: "Don't log sensitive data",
Description: "Never log passwords, tokens, credit cards, or PII. Mask or redact sensitive fields.",
Severity: "high",
},
{
ID: "sec:auth-checks",
Category: "security",
Title: "Implement proper authentication checks",
Description: "Verify authentication on every protected endpoint. Don't rely on client-side checks.",
Severity: "critical",
},
{
ID: "sec:input-validation",
Category: "security",
Title: "Validate all input on the server",
Description: "Client-side validation is for UX. Server-side validation is for security.",
Severity: "critical",
},
}
}
func (f *PracticesFetcher) getArchitecturePractices() []BestPractice {
return []BestPractice{
{
ID: "arch:single-responsibility",
Category: "architecture",
Title: "Single Responsibility Principle",
Description: "Each module/class should have one reason to change. Split large modules into focused ones.",
Severity: "medium",
},
{
ID: "arch:dependency-injection",
Category: "architecture",
Title: "Use dependency injection",
Description: "Inject dependencies rather than creating them internally. This improves testability and flexibility.",
Severity: "medium",
},
{
ID: "arch:layer-separation",
Category: "architecture",
Title: "Separate concerns by layer",
Description: "Keep presentation, business logic, and data access layers separate.",
Severity: "medium",
},
{
ID: "arch:interface-segregation",
Category: "architecture",
Title: "Prefer small, focused interfaces",
Description: "Clients shouldn't depend on methods they don't use. Split large interfaces.",
Severity: "low",
},
{
ID: "arch:avoid-god-classes",
Category: "architecture",
Title: "Avoid god classes/modules",
Description: "Classes with too many responsibilities should be split. Watch for high method/field counts.",
Severity: "medium",
},
{
ID: "arch:circular-dependencies",
Category: "architecture",
Title: "Eliminate circular dependencies",
Description: "Circular dependencies indicate tight coupling. Refactor to use dependency inversion.",
Severity: "high",
},
}
}
func (f *PracticesFetcher) getPerformancePractices(lang string) []BestPractice {
practices := []BestPractice{
{
ID: "perf:n-plus-one",
Category: "performance",
Title: "Avoid N+1 query patterns",
Description: "When iterating over results, avoid making separate queries for each item. Use JOINs or batch loading.",
Severity: "high",
},
{
ID: "perf:unbounded-results",
Category: "performance",
Title: "Limit query results",
Description: "Always paginate or limit query results to prevent memory exhaustion.",
Severity: "medium",
},
{
ID: "perf:connection-pooling",
Category: "performance",
Title: "Use connection pooling",
Description: "Don't create new connections per request. Use connection pools for databases and HTTP clients.",
Severity: "high",
},
{
ID: "perf:caching",
Category: "performance",
Title: "Cache expensive operations",
Description: "Cache frequently accessed, rarely changing data. Consider memoization for expensive computations.",
Severity: "medium",
},
{
ID: "perf:blocking-in-hot-path",
Category: "performance",
Title: "Avoid blocking operations in hot paths",
Description: "Move I/O, network calls, and heavy computations out of request handlers when possible.",
Severity: "medium",
},
}
if lang == "go" {
practices = append(practices, []BestPractice{
{
ID: "go:perf:string-concat",
Category: "performance",
Title: "Use strings.Builder for string concatenation",
Description: "In loops, use strings.Builder instead of += for efficient string concatenation.",
Pattern: `for[\s\S]*\+=.*["` + "`" + `]`,
Language: "go",
Severity: "medium",
},
{
ID: "go:perf:slice-prealloc",
Category: "performance",
Title: "Pre-allocate slices when size is known",
Description: "Use make([]T, 0, capacity) when you know the final size to avoid reallocations.",
Language: "go",
Severity: "low",
},
{
ID: "go:perf:json-marshal",
Category: "performance",
Title: "Consider streaming JSON for large payloads",
Description: "For large JSON, use json.Encoder/Decoder instead of Marshal/Unmarshal to reduce allocations.",
Language: "go",
Severity: "low",
},
}...)
}
return practices
}
func (f *PracticesFetcher) GetPracticesByCategory(category string) []BestPractice {
f.cacheMu.RLock()
defer f.cacheMu.RUnlock()
var result []BestPractice
for _, practices := range f.cache {
for _, p := range practices {
if p.Category == category {
result = append(result, p)
}
}
}
return result
}
func (f *PracticesFetcher) GetAllPractices() []BestPractice {
f.cacheMu.RLock()
defer f.cacheMu.RUnlock()
var result []BestPractice
seen := make(map[string]bool)
for _, practices := range f.cache {
for _, p := range practices {
if !seen[p.ID] {
result = append(result, p)
seen[p.ID] = true
}
}
}
return result
}
+97
View File
@@ -0,0 +1,97 @@
package quality
import (
"context"
"path/filepath"
)
// Detector interface defines the contract for code quality detectors
type Detector interface {
// Name returns the detector name
Name() string
// Detect runs the detector on the given path
Detect(ctx context.Context, path string, config *Config) ([]Finding, error)
// Severity returns the default severity for findings from this detector
Severity() Severity
}
// LanguageDetector interface extends Detector for language-specific detectors
type LanguageDetector interface {
Detector
// SupportedLanguages returns the languages this detector supports
SupportedLanguages() []string
// ExtractFunctions extracts function information from source files
ExtractFunctions(ctx context.Context, files []string) ([]FunctionInfo, error)
// ExtractClasses extracts class information from source files
ExtractClasses(ctx context.Context, files []string) ([]ClassInfo, error)
}
// FileFinder interface for finding files of a specific language
type FileFinder interface {
// FindFiles returns source files for the given path and language
FindFiles(path string, language string) ([]string, error)
// IsSourceFile checks if a file is a source file for the language
IsSourceFile(path string, language string) bool
}
// BaseDetector provides common functionality for detectors
type BaseDetector struct {
name string
severity Severity
finder FileFinder
}
// NewBaseDetector creates a new base detector
func NewBaseDetector(name string, severity Severity, finder FileFinder) *BaseDetector {
return &BaseDetector{
name: name,
severity: severity,
finder: finder,
}
}
// Name returns the detector name
func (d *BaseDetector) Name() string {
return d.name
}
// Severity returns the default severity
func (d *BaseDetector) Severity() Severity {
return d.severity
}
// FindFiles finds source files using the file finder
func (d *BaseDetector) FindFiles(path string, language string) ([]string, error) {
if d.finder != nil {
return d.finder.FindFiles(path, language)
}
return nil, nil
}
// ShouldExclude checks if a path should be excluded based on config
func ShouldExclude(path string, excludes []string) bool {
if len(excludes) == 0 {
return false
}
for _, pattern := range excludes {
matched, err := filepath.Match(pattern, path)
if err == nil && matched {
return true
}
// Check directory exclusion
matched, err = filepath.Match(pattern, filepath.Base(path))
if err == nil && matched {
return true
}
}
return false
}
+212
View File
@@ -0,0 +1,212 @@
package detectors
import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/yourorg/devour/internal/quality"
)
// ComplexityDetector detects complexity issues in source code
type ComplexityDetector struct {
*quality.BaseDetector
signals []ComplexitySignal
}
// ComplexitySignal represents a complexity pattern to detect
type ComplexitySignal struct {
Name string
Pattern *regexp.Regexp
Weight int
Threshold int
Compute func(content string, lines []string) (int, string)
}
// NewComplexityDetector creates a new complexity detector
func NewComplexityDetector(finder quality.FileFinder) *ComplexityDetector {
detector := &ComplexityDetector{
BaseDetector: quality.NewBaseDetector("complexity", quality.SeverityT2, finder),
signals: []ComplexitySignal{
{
Name: "nested if statements",
Pattern: regexp.MustCompile(`^\s*if\s+.*\{\s*$`),
Weight: 2,
Threshold: 3,
},
{
Name: "nested for loops",
Pattern: regexp.MustCompile(`^\s*for\s+.*\{\s*$`),
Weight: 3,
Threshold: 2,
},
{
Name: "switch statements",
Pattern: regexp.MustCompile(`^\s*switch\s+.*\{\s*$`),
Weight: 1,
Threshold: 5,
},
{
Name: "function calls",
Pattern: regexp.MustCompile(`\w+\(`),
Weight: 1,
Threshold: 20,
},
},
}
// Add Go-specific complexity signals
detector.addGoSignals()
return detector
}
// addGoSignals adds Go-specific complexity signals
func (d *ComplexityDetector) addGoSignals() {
goSignals := []ComplexitySignal{
{
Name: "goroutines",
Pattern: regexp.MustCompile(`go\s+\w+\(`),
Weight: 2,
Threshold: 3,
},
{
Name: "channels",
Pattern: regexp.MustCompile(`make\s*\(\s*chan`),
Weight: 2,
Threshold: 3,
},
{
Name: "select statements",
Pattern: regexp.MustCompile(`^\s*select\s*\{`),
Weight: 3,
Threshold: 2,
},
{
Name: "defer statements",
Pattern: regexp.MustCompile(`^\s*defer\s+`),
Weight: 1,
Threshold: 5,
},
}
d.signals = append(d.signals, goSignals...)
}
// Name returns the detector name
func (d *ComplexityDetector) Name() string {
return "complexity"
}
// Severity returns the default severity
func (d *ComplexityDetector) Severity() quality.Severity {
return quality.SeverityT2
}
// Detect runs complexity detection on the given path
func (d *ComplexityDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, config.Language)
if err != nil {
return nil, fmt.Errorf("failed to find files: %w", err)
}
var findings []quality.Finding
for _, file := range files {
if quality.ShouldExclude(file, config.Exclude) {
continue
}
fileFindings, err := d.analyzeFile(file, config)
if err != nil {
log.Printf("Failed to analyze file %s: %v", file, err)
continue
}
findings = append(findings, fileFindings...)
}
return findings, nil
}
// analyzeFile analyzes a single file for complexity issues
func (d *ComplexityDetector) analyzeFile(filePath string, config *quality.Config) ([]quality.Finding, error) {
content, err := filepath.Abs(filePath)
if err != nil {
return nil, err
}
// Read file content
fileContent, err := os.ReadFile(content)
if err != nil {
return nil, err
}
contentStr := string(fileContent)
lines := strings.Split(contentStr, "\n")
loc := len(lines)
if loc < config.MinLOC {
return nil, nil
}
var findings []quality.Finding
score := 0
var signals []string
// Check each complexity signal
for _, signal := range d.signals {
var count int
var label string
if signal.Compute != nil {
c, l := signal.Compute(contentStr, lines)
if c > 0 {
count = c
label = l
}
} else if signal.Pattern != nil {
matches := signal.Pattern.FindAllString(contentStr, -1)
count = len(matches)
if count > signal.Threshold {
label = fmt.Sprintf("%d %s", count, signal.Name)
}
}
if count > signal.Threshold {
signals = append(signals, label)
excess := count - signal.Threshold
if signal.Threshold == 0 {
excess = count
}
score += excess * signal.Weight
}
}
// Create finding if score exceeds threshold
if score >= config.Threshold && len(signals) > 0 {
finding := quality.Finding{
ID: fmt.Sprintf("complexity-%s-%d", filepath.Base(filePath), score),
Type: "complexity",
Title: "High complexity detected",
Description: fmt.Sprintf("File has complexity score of %d with signals: %s", score, strings.Join(signals, ", ")),
File: filePath,
Line: 1,
Severity: d.Severity(),
Score: score,
Status: quality.StatusOpen,
Metadata: map[string]string{
"loc": strconv.Itoa(loc),
"signals": strings.Join(signals, ";"),
},
}
findings = append(findings, finding)
}
return findings, nil
}
+358
View File
@@ -0,0 +1,358 @@
package detectors
import (
"context"
"crypto/sha256"
"fmt"
"log"
"os"
"regexp"
"strings"
"github.com/yourorg/devour/internal/quality"
)
// DuplicationDetector detects duplicate and near-duplicate code
type DuplicationDetector struct {
*quality.BaseDetector
similarityThreshold float64
}
// DuplicateCluster represents a cluster of similar functions
type DuplicateCluster struct {
Functions []quality.FunctionInfo `json:"functions"`
Similarity float64 `json:"similarity"`
Representative string `json:"representative"`
}
// NewDuplicationDetector creates a new duplication detector
func NewDuplicationDetector(finder quality.FileFinder) *DuplicationDetector {
return &DuplicationDetector{
BaseDetector: quality.NewBaseDetector("duplication", quality.SeverityT3, finder),
similarityThreshold: 0.8,
}
}
// Name returns the detector name
func (d *DuplicationDetector) Name() string {
return "duplication"
}
// Severity returns the default severity
func (d *DuplicationDetector) Severity() quality.Severity {
return quality.SeverityT3
}
// Detect runs duplication detection on the given path
func (d *DuplicationDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, config.Language)
if err != nil {
return nil, fmt.Errorf("failed to find files: %w", err)
}
// Extract functions from all files
var allFunctions []quality.FunctionInfo
for _, file := range files {
if quality.ShouldExclude(file, config.Exclude) {
continue
}
functions, err := d.extractFunctions(file)
if err != nil {
log.Printf("Failed to extract functions from %s: %v", file, err)
continue
}
allFunctions = append(allFunctions, functions...)
}
// Find duplicates
clusters := d.findDuplicates(allFunctions)
// Convert clusters to findings
var findings []quality.Finding
for i, cluster := range clusters {
if len(cluster.Functions) < 2 {
continue
}
finding := quality.Finding{
ID: fmt.Sprintf("duplication-cluster-%d", i),
Type: "duplication",
Title: "Code duplication detected",
Description: fmt.Sprintf("Found %d similar functions with %.2f similarity",
len(cluster.Functions), cluster.Similarity),
File: cluster.Functions[0].File,
Line: cluster.Functions[0].Line,
Severity: d.Severity(),
Score: len(cluster.Functions) * 2, // Score based on cluster size
Status: quality.StatusOpen,
Metadata: map[string]string{
"cluster_size": fmt.Sprintf("%d", len(cluster.Functions)),
"similarity": fmt.Sprintf("%.2f", cluster.Similarity),
"functions": d.formatFunctionList(cluster.Functions),
},
}
findings = append(findings, finding)
}
return findings, nil
}
// extractFunctions extracts functions from a source file
func (d *DuplicationDetector) extractFunctions(filePath string) ([]quality.FunctionInfo, error) {
content, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
contentStr := string(content)
lines := strings.Split(contentStr, "\n")
var functions []quality.FunctionInfo
// Simple function extraction for Go (can be enhanced with AST parsing)
for i, line := range lines {
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "func ") {
funcInfo := d.parseFunctionLine(trimmed, filePath, i+1, contentStr)
if funcInfo != nil {
functions = append(functions, *funcInfo)
}
}
}
return functions, nil
}
// parseFunctionLine parses a function declaration line
func (d *DuplicationDetector) parseFunctionLine(line, filePath string, lineNum int, content string) *quality.FunctionInfo {
// Extract function name
parts := strings.Fields(line)
if len(parts) < 2 {
return nil
}
funcName := parts[1]
// Remove parentheses and receiver if present
if idx := strings.Index(funcName, "("); idx != -1 {
funcName = funcName[:idx]
}
// Find function body
lines := strings.Split(content, "\n")
startLine := lineNum - 1
endLine := d.findFunctionEnd(lines, startLine)
if endLine <= startLine {
return nil
}
// Extract function body
bodyLines := lines[startLine:endLine]
body := strings.Join(bodyLines, "\n")
loc := endLine - startLine
// Create normalized version for comparison
normalized := d.normalizeFunction(body)
bodyHash := d.hashFunction(normalized)
return &quality.FunctionInfo{
Name: funcName,
File: filePath,
Line: lineNum,
EndLine: endLine,
LOC: loc,
Body: body,
Normalized: normalized,
BodyHash: bodyHash,
}
}
// findFunctionEnd finds the end line of a function
func (d *DuplicationDetector) findFunctionEnd(lines []string, startLine int) int {
if startLine >= len(lines) {
return startLine
}
braceCount := 0
for i := startLine; i < len(lines); i++ {
line := lines[i]
braceCount += strings.Count(line, "{")
braceCount += strings.Count(line, "}")
if braceCount == 0 && i > startLine {
return i
}
}
return len(lines)
}
// normalizeFunction normalizes a function for comparison
func (d *DuplicationDetector) normalizeFunction(body string) string {
// Remove comments
body = regexp.MustCompile(`//.*`).ReplaceAllString(body, "")
body = regexp.MustCompile(`/\*[\s\S]*?\*/`).ReplaceAllString(body, "")
// Normalize whitespace
body = regexp.MustCompile(`\s+`).ReplaceAllString(body, " ")
body = strings.TrimSpace(body)
// Normalize variable names (simple approach)
body = regexp.MustCompile(`\b[a-z][a-zA-Z0-9]*\b`).ReplaceAllString(body, "VAR")
return body
}
// hashFunction creates a hash of the normalized function
func (d *DuplicationDetector) hashFunction(normalized string) string {
hash := sha256.Sum256([]byte(normalized))
return fmt.Sprintf("%x", hash)
}
// findDuplicates finds duplicate functions using similarity analysis
func (d *DuplicationDetector) findDuplicates(functions []quality.FunctionInfo) []DuplicateCluster {
var clusters []DuplicateCluster
// Group by exact hash first
hashGroups := make(map[string][]quality.FunctionInfo)
for _, fn := range functions {
hashGroups[fn.BodyHash] = append(hashGroups[fn.BodyHash], fn)
}
// Create clusters from exact duplicates
for _, group := range hashGroups {
if len(group) >= 2 {
cluster := DuplicateCluster{
Functions: group,
Similarity: 1.0,
Representative: group[0].Name,
}
clusters = append(clusters, cluster)
}
}
// Find near-duplicates using similarity
processed := make(map[int]bool)
for i, fn1 := range functions {
if processed[i] {
continue
}
var similar []quality.FunctionInfo
similar = append(similar, fn1)
for j, fn2 := range functions {
if i == j || processed[j] {
continue
}
similarity := d.calculateSimilarity(fn1.Normalized, fn2.Normalized)
if similarity >= d.similarityThreshold {
similar = append(similar, fn2)
processed[j] = true
}
}
if len(similar) >= 2 {
cluster := DuplicateCluster{
Functions: similar,
Similarity: d.similarityThreshold,
Representative: similar[0].Name,
}
clusters = append(clusters, cluster)
}
processed[i] = true
}
return clusters
}
// calculateSimilarity calculates similarity between two strings
func (d *DuplicationDetector) calculateSimilarity(s1, s2 string) float64 {
if s1 == s2 {
return 1.0
}
// Simple Levenshtein distance-based similarity
distance := d.levenshteinDistance(s1, s2)
maxLen := max(len(s1), len(s2))
if maxLen == 0 {
return 1.0
}
return 1.0 - float64(distance)/float64(maxLen)
}
// levenshteinDistance calculates the Levenshtein distance between two strings
func (d *DuplicationDetector) levenshteinDistance(s1, s2 string) int {
m, n := len(s1), len(s2)
if m < n {
s1, s2 = s2, s1
m, n = n, m
}
if n == 0 {
return m
}
prev := make([]int, n+1)
for i := range prev {
prev[i] = i
}
for i := 1; i <= m; i++ {
current := make([]int, n+1)
current[0] = i
for j := 1; j <= n; j++ {
cost := 0
if s1[i-1] != s2[j-1] {
cost = 1
}
current[j] = min(
prev[j]+1, // deletion
current[j-1]+1, // insertion
prev[j-1]+cost, // substitution
)
}
prev = current
}
return prev[n]
}
// formatFunctionList formats a list of functions for metadata
func (d *DuplicationDetector) formatFunctionList(functions []quality.FunctionInfo) string {
var names []string
for _, fn := range functions {
names = append(names, fmt.Sprintf("%s:%d", fn.Name, fn.Line))
}
return strings.Join(names, ",")
}
// min returns the minimum of three integers
func min(a, b, c int) int {
if a < b {
if a < c {
return a
}
return c
}
if b < c {
return b
}
return c
}
// max returns the maximum of two integers
func max(a, b int) int {
if a > b {
return a
}
return b
}
+256
View File
@@ -0,0 +1,256 @@
package detectors
import (
"context"
"fmt"
"path/filepath"
"strings"
"github.com/yourorg/devour/internal/quality"
)
// NamingConvention represents a naming convention
type NamingConvention string
const (
ConventionKebabCase NamingConvention = "kebab-case"
ConventionPascalCase NamingConvention = "PascalCase"
ConventionCamelCase NamingConvention = "camelCase"
ConventionSnakeCase NamingConvention = "snake_case"
ConventionFlatLower NamingConvention = "flat_lower"
)
// NamingDetector detects naming inconsistencies
type NamingDetector struct {
*quality.BaseDetector
skipNames map[string]bool
skipDirs map[string]bool
}
// NamingAnalysis represents naming analysis for a directory
type NamingAnalysis struct {
Directory string `json:"directory"`
Conventions map[NamingConvention]int `json:"conventions"`
TotalFiles int `json:"total_files"`
Minority NamingConvention `json:"minority"`
MinorityCount int `json:"minority_count"`
MinorityPercent float64 `json:"minority_percent"`
}
// NewNamingDetector creates a new naming detector
func NewNamingDetector(finder quality.FileFinder) *NamingDetector {
skipNames := map[string]bool{
"README.md": true,
"LICENSE": true,
"Makefile": true,
"Dockerfile": true,
"go.mod": true,
"go.sum": true,
}
skipDirs := map[string]bool{
".git": true,
"node_modules": true,
"vendor": true,
".vscode": true,
".idea": true,
}
return &NamingDetector{
BaseDetector: quality.NewBaseDetector("naming", quality.SeverityT2, finder),
skipNames: skipNames,
skipDirs: skipDirs,
}
}
// Name returns the detector name
func (d *NamingDetector) Name() string {
return "naming"
}
// Severity returns the default severity
func (d *NamingDetector) Severity() quality.Severity {
return quality.SeverityT2
}
// Detect runs naming inconsistency detection
func (d *NamingDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, config.Language)
if err != nil {
return nil, fmt.Errorf("failed to find files: %w", err)
}
// Group files by directory
dirFiles := make(map[string][]string)
for _, file := range files {
if quality.ShouldExclude(file, config.Exclude) {
continue
}
dir := filepath.Dir(file)
dirFiles[dir] = append(dirFiles[dir], file)
}
var findings []quality.Finding
// Analyze each directory
for dir, files := range dirFiles {
analysis := d.analyzeDirectory(dir, files)
if d.shouldReport(analysis) {
finding := d.createFinding(analysis)
findings = append(findings, finding)
}
}
return findings, nil
}
// analyzeDirectory analyzes naming conventions in a directory
func (d *NamingDetector) analyzeDirectory(dir string, files []string) NamingAnalysis {
conventions := make(map[NamingConvention]int)
totalFiles := 0
for _, file := range files {
filename := filepath.Base(file)
// Skip certain files
if d.skipNames[filename] {
continue
}
// Check if we should skip this directory
if d.skipDirs[filepath.Base(dir)] {
continue
}
convention := d.classifyConvention(filename)
if convention != "" {
conventions[convention]++
totalFiles++
}
}
// Find minority convention
minority, minorityCount, minorityPercent := d.findMinorityConvention(conventions, totalFiles)
return NamingAnalysis{
Directory: dir,
Conventions: conventions,
TotalFiles: totalFiles,
Minority: minority,
MinorityCount: minorityCount,
MinorityPercent: minorityPercent,
}
}
// classifyConvention classifies a filename into a naming convention
func (d *NamingDetector) classifyConvention(filename string) NamingConvention {
// Remove extension
stem := filename
if idx := strings.LastIndex(filename, "."); idx != -1 {
stem = filename[:idx]
}
if stem == "" {
return ""
}
// Check each convention
if strings.Contains(stem, "-") && stem == strings.ToLower(stem) {
return ConventionKebabCase
}
if len(stem) > 0 && strings.ToUpper(string(stem[0])) == string(stem[0]) && !strings.Contains(stem, "-") {
return ConventionPascalCase
}
if len(stem) > 0 && strings.ToLower(string(stem[0])) == string(stem[0]) &&
d.hasUpper(stem) && !strings.Contains(stem, "-") {
return ConventionCamelCase
}
if strings.Contains(stem, "_") && stem == strings.ToLower(stem) {
return ConventionSnakeCase
}
if stem == strings.ToLower(stem) && !strings.Contains(stem, "-") {
return ConventionFlatLower
}
return ""
}
// hasUpper checks if a string contains uppercase letters
func (d *NamingDetector) hasUpper(s string) bool {
for _, r := range s {
if r >= 'A' && r <= 'Z' {
return true
}
}
return false
}
// findMinorityConvention finds the minority naming convention
func (d *NamingDetector) findMinorityConvention(conventions map[NamingConvention]int, totalFiles int) (NamingConvention, int, float64) {
if len(conventions) < 2 {
return "", 0, 0
}
var minority NamingConvention
minorityCount := 0
minCount := totalFiles
for convention, count := range conventions {
if count < minCount {
minCount = count
minorityCount = count
minority = convention
}
}
// Check thresholds
minorityPercent := float64(minorityCount) / float64(totalFiles) * 100
// Only report if minority has >= 5 files and >= 15% of total
if minorityCount >= 5 && minorityPercent >= 15 {
return minority, minorityCount, minorityPercent
}
return "", 0, 0
}
// shouldReport determines if the analysis should be reported
func (d *NamingDetector) shouldReport(analysis NamingAnalysis) bool {
return analysis.Minority != "" &&
analysis.MinorityCount >= 5 &&
analysis.MinorityPercent >= 15
}
// createFinding creates a finding from analysis
func (d *NamingDetector) createFinding(analysis NamingAnalysis) quality.Finding {
conventionList := make([]string, 0, len(analysis.Conventions))
for conv, count := range analysis.Conventions {
conventionList = append(conventionList, fmt.Sprintf("%s (%d)", conv, count))
}
return quality.Finding{
ID: fmt.Sprintf("naming-%s", strings.ReplaceAll(analysis.Directory, "/", "-")),
Type: "naming",
Title: "Naming inconsistency detected",
Description: fmt.Sprintf("Directory '%s' has mixed naming conventions. Minority: %s with %d files (%.1f%%). All conventions: %s",
analysis.Directory, analysis.Minority, analysis.MinorityCount, analysis.MinorityPercent, strings.Join(conventionList, ", ")),
File: analysis.Directory,
Line: 1,
Severity: d.Severity(),
Score: int(analysis.MinorityPercent), // Score based on percentage
Status: quality.StatusOpen,
Metadata: map[string]string{
"directory": analysis.Directory,
"minority": string(analysis.Minority),
"minority_count": fmt.Sprintf("%d", analysis.MinorityCount),
"minority_percent": fmt.Sprintf("%.1f", analysis.MinorityPercent),
"total_files": fmt.Sprintf("%d", analysis.TotalFiles),
"conventions": strings.Join(conventionList, ";"),
},
}
}
+301
View File
@@ -0,0 +1,301 @@
package quality
import (
"time"
)
// Dimension represents a quality dimension category
type Dimension string
const (
DimensionFileHealth Dimension = "File health"
DimensionCodeQuality Dimension = "Code quality"
DimensionDuplication Dimension = "Duplication"
DimensionTestHealth Dimension = "Test health"
DimensionSecurity Dimension = "Security"
DimensionNamingQuality Dimension = "Naming Quality"
DimensionErrorConsistency Dimension = "Error Consistency"
DimensionAbstractionFit Dimension = "Abstraction Fit"
DimensionLogicClarity Dimension = "Logic Clarity"
DimensionAIGeneratedDebt Dimension = "AI Generated Debt"
DimensionTypeSafety Dimension = "Type Safety"
DimensionContractCoherence Dimension = "Contract Coherence"
DimensionElegance Dimension = "Elegance"
DimensionContracts Dimension = "Contracts"
)
// DetectorMetrics represents metrics for a specific detector
type DetectorMetrics struct {
Potential int `json:"potential"`
PassRate float64 `json:"pass_rate"`
Issues int `json:"issues"`
WeightedFailures float64 `json:"weighted_failures"`
}
// DimensionScore represents the score for a quality dimension
type DimensionScore struct {
Score float64 `json:"score"`
Strict float64 `json:"strict"`
Checks int `json:"checks"`
Issues int `json:"issues"`
Tier int `json:"tier"`
Detectors map[string]*DetectorMetrics `json:"detectors"`
}
// ScanStats represents scanning statistics
type ScanStats struct {
Total int `json:"total"`
Open int `json:"open"`
Fixed int `json:"fixed"`
AutoResolved int `json:"auto_resolved"`
Wontfix int `json:"wontfix"`
FalsePositive int `json:"false_positive"`
ByTier map[string]*TierStats `json:"by_tier"`
}
// TierStats represents statistics for a severity tier
type TierStats struct {
Open int `json:"open"`
Fixed int `json:"fixed"`
AutoResolved int `json:"auto_resolved"`
Wontfix int `json:"wontfix"`
FalsePositive int `json:"false_positive"`
}
// DetectorTransparency represents transparency information for detectors
type DetectorTransparency struct {
Rows []DetectorRow `json:"rows"`
Totals DetectorTotals `json:"totals"`
}
// DetectorRow represents a single detector's transparency data
type DetectorRow struct {
Detector string `json:"detector"`
Visible int `json:"visible"`
Suppressed int `json:"suppressed"`
Excluded int `json:"excluded"`
TotalDetected int `json:"total_detected"`
}
// DetectorTotals represents totals for detector transparency
type DetectorTotals struct {
Visible int `json:"visible"`
Suppressed int `json:"suppressed"`
Excluded int `json:"excluded"`
Detectors int `json:"detectors"`
}
// Potentials represents potential scores by language
type Potentials struct {
Languages map[string]*LanguagePotentials `json:"languages"`
}
// LanguagePotentials represents potential scores for a language
type LanguagePotentials struct {
Logs int `json:"logs"`
Unused int `json:"unused"`
Exports int `json:"exports"`
Deprecated int `json:"deprecated"`
Structural int `json:"structural"`
FlatDirs int `json:"flat_dirs"`
Props int `json:"props"`
SingleUse int `json:"single_use"`
Coupling int `json:"coupling"`
Cycles int `json:"cycles"`
Orphaned int `json:"orphaned"`
Patterns int `json:"patterns"`
Naming int `json:"naming"`
Facade int `json:"facade"`
TestCoverage int `json:"test_coverage"`
Smells int `json:"smells"`
React int `json:"react"`
Security int `json:"security"`
SubjectiveReview int `json:"subjective_review"`
Dupes int `json:"dupes"`
}
// CodebaseMetrics represents metrics about the codebase
type CodebaseMetrics struct {
Languages map[string]*LanguageMetrics `json:"languages"`
}
// LanguageMetrics represents metrics for a specific language
type LanguageMetrics struct {
TotalFiles int `json:"total_files"`
TotalLOC int `json:"total_loc"`
TotalDirectories int `json:"total_directories"`
}
// StrictTarget represents the target scoring information
type StrictTarget struct {
Target float64 `json:"target"`
Current float64 `json:"current"`
Gap float64 `json:"gap"`
State string `json:"state"`
Warning *string `json:"warning"`
}
// Narrative represents the analysis narrative
type Narrative struct {
Phase string `json:"phase"`
Headline string `json:"headline"`
Dimensions *NarrativeDimensions `json:"dimensions"`
Actions []string `json:"actions"`
Strategy *NarrativeStrategy `json:"strategy"`
Tools *NarrativeTools `json:"tools"`
Debt *NarrativeDebt `json:"debt"`
Milestone string `json:"milestone"`
PrimaryAction *string `json:"primary_action"`
WhyNow string `json:"why_now"`
VerificationStep *string `json:"verification_step"`
RiskFlags []string `json:"risk_flags"`
StrictTarget *StrictTarget `json:"strict_target"`
Reminders []string `json:"reminders"`
ReminderHistory *ReminderHistory `json:"reminder_history"`
}
// NarrativeDimensions represents dimension analysis in narrative
type NarrativeDimensions struct {
LowestDimensions []*DimensionInfo `json:"lowest_dimensions"`
BiggestGapDimensions []*DimensionInfo `json:"biggest_gap_dimensions"`
StagnantDimensions []*DimensionInfo `json:"stagnant_dimensions"`
}
// DimensionInfo represents information about a dimension
type DimensionInfo struct {
Name string `json:"name"`
Strict float64 `json:"strict"`
Issues int `json:"issues"`
Impact float64 `json:"impact"`
Subjective bool `json:"subjective"`
ImpactDescription string `json:"impact_description"`
StuckScans *int `json:"stuck_scans,omitempty"`
Lenient *float64 `json:"lenient,omitempty"`
Gap *float64 `json:"gap,omitempty"`
WontfixCount *int `json:"wontfix_count,omitempty"`
}
// NarrativeStrategy represents strategy information
type NarrativeStrategy struct {
FixerLeverage *FixerLeverage `json:"fixer_leverage"`
Lanes map[string]interface{} `json:"lanes"`
CanParallelize bool `json:"can_parallelize"`
Hint string `json:"hint"`
}
// FixerLeverage represents fixer leverage information
type FixerLeverage struct {
AutoFixableCount int `json:"auto_fixable_count"`
TotalCount int `json:"total_count"`
Coverage float64 `json:"coverage"`
ImpactRatio float64 `json:"impact_ratio"`
Recommendation string `json:"recommendation"`
}
// NarrativeTools represents available tools
type NarrativeTools struct {
Fixers []interface{} `json:"fixers"`
Move *MoveTool `json:"move"`
Plan *PlanTool `json:"plan"`
Badge *BadgeTool `json:"badge"`
}
// MoveTool represents the move tool
type MoveTool struct {
Available bool `json:"available"`
Relevant bool `json:"relevant"`
Reason *string `json:"reason"`
Usage string `json:"usage"`
}
// PlanTool represents the plan tool
type PlanTool struct {
Command string `json:"command"`
Description string `json:"description"`
}
// BadgeTool represents the badge tool
type BadgeTool struct {
Generated bool `json:"generated"`
InReadme bool `json:"in_readme"`
Path string `json:"path"`
Recommendation *string `json:"recommendation"`
}
// NarrativeDebt represents debt analysis
type NarrativeDebt struct {
OverallGap float64 `json:"overall_gap"`
WontfixCount int `json:"wontfix_count"`
WorstDimension string `json:"worst_dimension"`
WorstGap float64 `json:"worst_gap"`
Trend string `json:"trend"`
}
// ReminderHistory represents reminder history
type ReminderHistory struct {
ReportScores int `json:"report_scores"`
AutoFixersAvailable int `json:"auto_fixers_available"`
DryRunFirst int `json:"dry_run_first"`
ZoneClassification int `json:"zone_classification"`
FPCalibrationExportsProduction int `json:"fp_calibration_exports_production"`
FeedbackNudge int `json:"feedback_nudge"`
WontfixGrowing int `json:"wontfix_growing"`
StagnantNudge int `json:"stagnant_nudge"`
ReviewNotRun int `json:"review_not_run"`
BadgeRecommendation int `json:"badge_recommendation"`
}
// QualityConfig represents enhanced quality configuration
type QualityConfig struct {
ReviewMaxAgeDays int `json:"review_max_age_days"`
HolisticMaxAgeDays int `json:"holistic_max_age_days"`
GenerateScorecard bool `json:"generate_scorecard"`
BadgePath string `json:"badge_path"`
Exclude []string `json:"exclude"`
Ignore []string `json:"ignore"`
IgnoreMetadata map[string]interface{} `json:"ignore_metadata"`
ZoneOverrides map[string]interface{} `json:"zone_overrides"`
ReviewDimensions []string `json:"review_dimensions"`
ReviewAllowCustomDimensions bool `json:"review_allow_custom_dimensions"`
ReviewCustomDimensions []string `json:"review_custom_dimensions"`
LargeFilesThreshold int `json:"large_files_threshold"`
PropsThreshold int `json:"props_threshold"`
FindingNoiseBudget int `json:"finding_noise_budget"`
FindingNoiseGlobalBudget int `json:"finding_noise_global_budget"`
TargetStrictScore int `json:"target_strict_score"`
Languages map[string]interface{} `json:"languages"`
}
// EnhancedStatus represents the comprehensive status response
type EnhancedStatus struct {
Command string `json:"command"`
OverallScore float64 `json:"overall_score"`
ObjectiveScore float64 `json:"objective_score"`
StrictScore float64 `json:"strict_score"`
StrictAllDetected float64 `json:"strict_all_detected"`
DimensionScores map[Dimension]*DimensionScore `json:"dimension_scores"`
Stats *ScanStats `json:"stats"`
ScanCount int `json:"scan_count"`
LastScan time.Time `json:"last_scan"`
ByTier map[string]*TierStats `json:"by_tier"`
Ignores []string `json:"ignores"`
Suppression *SuppressionInfo `json:"suppression"`
DetectorTransparency *DetectorTransparency `json:"detector_transparency"`
Potentials *Potentials `json:"potentials"`
CodebaseMetrics *CodebaseMetrics `json:"codebase_metrics"`
StrictTarget *StrictTarget `json:"strict_target"`
Narrative *Narrative `json:"narrative"`
Config *QualityConfig `json:"config"`
}
// SuppressionInfo represents suppression information
type SuppressionInfo struct {
LastIgnored int `json:"last_ignored"`
LastRawFindings int `json:"last_raw_findings"`
LastSuppressedPct float64 `json:"last_suppressed_pct"`
LastIgnorePatterns int `json:"last_ignore_patterns"`
RecentScans int `json:"recent_scans"`
RecentIgnored int `json:"recent_ignored"`
RecentRawFindings int `json:"recent_raw_findings"`
RecentSuppressedPct float64 `json:"recent_suppressed_pct"`
}
+176
View File
@@ -0,0 +1,176 @@
package quality
import (
"os"
"path/filepath"
"strings"
)
// LanguageConfig represents configuration for a programming language
type LanguageConfig struct {
Name string `json:"name"`
Extensions []string `json:"extensions"`
MarkerFiles []string `json:"marker_files"`
DefaultSrc string `json:"default_src"`
}
// GetSupportedLanguages returns all supported languages
func GetSupportedLanguages() []LanguageConfig {
return []LanguageConfig{
{
Name: "go",
Extensions: []string{".go"},
MarkerFiles: []string{"go.mod", "go.sum"},
DefaultSrc: ".",
},
{
Name: "typescript",
Extensions: []string{".ts", ".tsx"},
MarkerFiles: []string{"package.json", "tsconfig.json"},
DefaultSrc: "src",
},
{
Name: "python",
Extensions: []string{".py"},
MarkerFiles: []string{"requirements.txt", "setup.py", "pyproject.toml"},
DefaultSrc: ".",
},
{
Name: "java",
Extensions: []string{".java"},
MarkerFiles: []string{"pom.xml", "build.gradle"},
DefaultSrc: "src/main/java",
},
{
Name: "rust",
Extensions: []string{".rs"},
MarkerFiles: []string{"Cargo.toml"},
DefaultSrc: "src",
},
{
Name: "javascript",
Extensions: []string{".js", ".jsx"},
MarkerFiles: []string{"package.json"},
DefaultSrc: "src",
},
{
Name: "csharp",
Extensions: []string{".cs"},
MarkerFiles: []string{"*.csproj", "*.sln"},
DefaultSrc: ".",
},
{
Name: "dart",
Extensions: []string{".dart"},
MarkerFiles: []string{"pubspec.yaml"},
DefaultSrc: "lib",
},
}
}
// DefaultFileFinder implements FileFinder interface
type DefaultFileFinder struct{}
// NewDefaultFileFinder creates a new default file finder
func NewDefaultFileFinder() *DefaultFileFinder {
return &DefaultFileFinder{}
}
// FindFiles returns source files for the given path and language
func (f *DefaultFileFinder) FindFiles(path string, language string) ([]string, error) {
languages := GetSupportedLanguages()
var extensions []string
// Find language config
for _, lang := range languages {
if lang.Name == language {
extensions = lang.Extensions
break
}
}
// Default to Go extensions if not found
if len(extensions) == 0 {
extensions = []string{".go"}
}
var files []string
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Skip directories
if info.IsDir() {
// Skip hidden directories and common exclude dirs
base := filepath.Base(filePath)
if strings.HasPrefix(base, ".") || base == "node_modules" || base == "vendor" {
return filepath.SkipDir
}
return nil
}
// Check file extension
ext := filepath.Ext(filePath)
for _, langExt := range extensions {
if ext == langExt {
files = append(files, filePath)
break
}
}
return nil
})
return files, err
}
// IsSourceFile checks if a file is a source file for the language
func (f *DefaultFileFinder) IsSourceFile(path string, language string) bool {
languages := GetSupportedLanguages()
var extensions []string
// Find language config
for _, lang := range languages {
if lang.Name == language {
extensions = lang.Extensions
break
}
}
// Default to Go extensions if not found
if len(extensions) == 0 {
extensions = []string{".go"}
}
ext := filepath.Ext(path)
for _, langExt := range extensions {
if ext == langExt {
return true
}
}
return false
}
// DetectLanguage attempts to auto-detect the project language from marker files
func DetectLanguage(path string) string {
languages := GetSupportedLanguages()
// Check for marker files in order of specificity
for _, lang := range languages {
for _, marker := range lang.MarkerFiles {
markerPath := filepath.Join(path, marker)
if _, err := filepath.Glob(markerPath); err == nil {
// Check if any files match the pattern
matches, _ := filepath.Glob(markerPath)
if len(matches) > 0 {
return lang.Name
}
}
}
}
// Default to Go if no markers found
return "go"
}
+438
View File
@@ -0,0 +1,438 @@
package quality
import (
"fmt"
"sort"
)
type NarrativeGenerator struct {
targetScore int
}
func NewNarrativeGenerator(targetScore int) *NarrativeGenerator {
if targetScore <= 0 {
targetScore = 95
}
return &NarrativeGenerator{targetScore: targetScore}
}
func (g *NarrativeGenerator) Generate(findings []Finding, scorecard *Scorecard, history []StateSnapshot) *Narrative {
phase := g.determinePhase(findings, scorecard)
headline := g.generateHeadline(phase, scorecard)
dimensions := g.analyzeDimensions(findings)
actions := g.generateActions(findings, phase)
strategy := g.generateStrategy(findings, dimensions)
tools := g.generateTools(findings)
debt := g.analyzeDebt(findings, scorecard)
strictTarget := g.calculateStrictTarget(scorecard)
reminders := g.generateReminders(findings, history)
riskFlags := g.identifyRisks(findings, history)
return &Narrative{
Phase: phase,
Headline: headline,
Dimensions: dimensions,
Actions: actions,
Strategy: strategy,
Tools: tools,
Debt: debt,
Milestone: g.generateMilestone(phase, scorecard),
WhyNow: g.explainWhyNow(phase, findings),
RiskFlags: riskFlags,
StrictTarget: strictTarget,
Reminders: reminders,
}
}
func (g *NarrativeGenerator) determinePhase(findings []Finding, scorecard *Scorecard) string {
openCount := 0
t4Count := 0
t3Count := 0
for _, f := range findings {
if f.Status == StatusOpen {
openCount++
if f.Severity == SeverityT4 {
t4Count++
} else if f.Severity == SeverityT3 {
t3Count++
}
}
}
if openCount == 0 {
return "maintenance"
}
if t4Count > 0 {
return "critical"
}
if t3Count > 5 || openCount > 20 {
return "debt_reduction"
}
if openCount > 5 {
return "cleanup"
}
return "polish"
}
func (g *NarrativeGenerator) generateHeadline(phase string, scorecard *Scorecard) string {
switch phase {
case "maintenance":
return "Codebase is healthy! Focus on preventing new debt."
case "critical":
return fmt.Sprintf("Critical issues detected (%d strict score). Address T4 findings first.", scorecard.StrictScore)
case "debt_reduction":
return fmt.Sprintf("Significant technical debt (%d open issues). Systematic cleanup recommended.", scorecard.TotalScore)
case "cleanup":
return fmt.Sprintf("Minor issues detected (%d open). Quick wins available.", scorecard.TotalScore)
default:
return fmt.Sprintf("Codebase in good shape (%d open issues).", scorecard.TotalScore)
}
}
func (g *NarrativeGenerator) analyzeDimensions(findings []Finding) *NarrativeDimensions {
dimensionScores := make(map[Dimension][]Finding)
for _, f := range findings {
if f.Status == StatusOpen {
dim := g.classifyDimension(f)
dimensionScores[dim] = append(dimensionScores[dim], f)
}
}
var lowest []*DimensionInfo
var biggestGap []*DimensionInfo
var stagnant []*DimensionInfo
for dim, dimFindings := range dimensionScores {
info := &DimensionInfo{
Name: string(dim),
Issues: len(dimFindings),
}
impact := 0
for _, f := range dimFindings {
impact += f.Score * int(f.Severity)
}
info.Impact = float64(impact)
lowest = append(lowest, info)
}
sort.Slice(lowest, func(i, j int) bool {
return lowest[i].Impact > lowest[j].Impact
})
if len(lowest) > 5 {
lowest = lowest[:5]
}
return &NarrativeDimensions{
LowestDimensions: lowest,
BiggestGapDimensions: biggestGap,
StagnantDimensions: stagnant,
}
}
func (g *NarrativeGenerator) classifyDimension(f Finding) Dimension {
switch f.Type {
case "complexity", "complexity_ast":
return DimensionCodeQuality
case "duplication", "dupes":
return DimensionDuplication
case "dead_code", "unused_import", "unused":
return DimensionFileHealth
case "security":
return DimensionSecurity
case "naming":
return DimensionNamingQuality
case "import_cycle", "cycles":
return DimensionAbstractionFit
default:
return DimensionCodeQuality
}
}
func (g *NarrativeGenerator) generateActions(findings []Finding, phase string) []string {
var actions []string
t1AutoFixable := 0
t2Quick := 0
t3Judgment := 0
t4Major := 0
for _, f := range findings {
if f.Status != StatusOpen {
continue
}
switch f.Severity {
case SeverityT1:
t1AutoFixable++
case SeverityT2:
t2Quick++
case SeverityT3:
t3Judgment++
case SeverityT4:
t4Major++
}
}
if t4Major > 0 {
actions = append(actions, fmt.Sprintf("Address %d T4 (major refactor) issues - these require architectural changes", t4Major))
}
if t3Judgment > 0 {
actions = append(actions, fmt.Sprintf("Review %d T3 (needs judgment) issues - decide if they need fixing", t3Judgment))
}
if t1AutoFixable > 0 {
actions = append(actions, fmt.Sprintf("Run auto-fixer for %d T1 (auto-fixable) issues", t1AutoFixable))
}
if t2Quick > 0 {
actions = append(actions, fmt.Sprintf("Quick manual fixes available for %d T2 issues", t2Quick))
}
if len(actions) == 0 {
actions = append(actions, "No immediate actions required - maintain code quality")
}
return actions
}
func (g *NarrativeGenerator) generateStrategy(findings []Finding, dimensions *NarrativeDimensions) *NarrativeStrategy {
autoFixable := 0
total := 0
for _, f := range findings {
if f.Status == StatusOpen {
total++
if f.Severity == SeverityT1 {
autoFixable++
}
}
}
var recommendation string
var coverage float64
if total > 0 {
coverage = float64(autoFixable) / float64(total) * 100
}
if coverage > 50 {
recommendation = "Use auto-fixers first, then address remaining issues manually"
} else if autoFixable > 0 {
recommendation = "Start with auto-fixers for quick wins, then prioritize by impact"
} else {
recommendation = "Prioritize by severity and impact, starting with T4 issues"
}
return &NarrativeStrategy{
FixerLeverage: &FixerLeverage{
AutoFixableCount: autoFixable,
TotalCount: total,
Coverage: coverage,
Recommendation: recommendation,
},
CanParallelize: len(findings) > 3,
Hint: g.generateHint(findings),
}
}
func (g *NarrativeGenerator) generateHint(findings []Finding) string {
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT1 {
return "T1 issues can be auto-fixed with 'devour quality fix'"
}
}
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT4 {
return "T4 issues require planning - consider creating a dedicated branch"
}
}
return "Focus on one category at a time for best results"
}
func (g *NarrativeGenerator) generateTools(findings []Finding) *NarrativeTools {
fixers := []interface{}{}
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT1 {
fixers = append(fixers, map[string]string{
"name": f.Type,
"description": fmt.Sprintf("Fix %s issues", f.Type),
})
}
}
return &NarrativeTools{
Fixers: fixers,
Plan: &PlanTool{
Command: "devour quality plan",
Description: "Generate prioritized action plan",
},
Badge: &BadgeTool{
Generated: true,
InReadme: false,
Path: "scorecard.png",
},
}
}
func (g *NarrativeGenerator) analyzeDebt(findings []Finding, scorecard *Scorecard) *NarrativeDebt {
wontfixCount := 0
for _, f := range findings {
if f.Status == StatusWontfix {
wontfixCount++
}
}
var worstDimension string
var worstGap float64
dimensionImpact := make(map[string]float64)
for _, f := range findings {
if f.Status == StatusOpen {
dim := string(g.classifyDimension(f))
dimensionImpact[dim] += float64(f.Score * int(f.Severity))
}
}
for dim, impact := range dimensionImpact {
if impact > worstGap {
worstGap = impact
worstDimension = dim
}
}
return &NarrativeDebt{
OverallGap: float64(scorecard.StrictScore),
WontfixCount: wontfixCount,
WorstDimension: worstDimension,
WorstGap: worstGap,
Trend: "stable",
}
}
func (g *NarrativeGenerator) calculateStrictTarget(scorecard *Scorecard) *StrictTarget {
gap := float64(scorecard.StrictScore) / float64(g.targetScore) * 100
var state string
var warning *string
switch {
case gap >= 100:
state = "at_target"
case gap >= 80:
state = "near_target"
case gap >= 50:
state = "in_progress"
w := "Significant gap to target - consider focused effort"
warning = &w
default:
state = "needs_work"
w := "Large gap to target - prioritize high-impact fixes"
warning = &w
}
return &StrictTarget{
Target: float64(g.targetScore),
Current: float64(scorecard.StrictScore),
Gap: gap,
State: state,
Warning: warning,
}
}
func (g *NarrativeGenerator) generateReminders(findings []Finding, history []StateSnapshot) []string {
var reminders []string
autoFixable := 0
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT1 {
autoFixable++
}
}
if autoFixable > 0 {
reminders = append(reminders, fmt.Sprintf("%d auto-fixable issues available - use 'devour quality fix'", autoFixable))
}
if len(history) > 0 {
latest := history[len(history)-1]
if latest.Findings == len(findings) {
reminders = append(reminders, "No progress since last scan - consider tackling a specific category")
}
}
return reminders
}
func (g *NarrativeGenerator) identifyRisks(findings []Finding, history []StateSnapshot) []string {
var risks []string
t4Count := 0
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT4 {
t4Count++
}
}
if t4Count > 3 {
risks = append(risks, fmt.Sprintf("High number of T4 issues (%d) indicates architectural debt", t4Count))
}
if len(history) >= 3 {
trend := 0
for i := len(history) - 3; i < len(history); i++ {
trend += history[i].Findings
}
avg := trend / 3
if len(findings) > int(float64(avg)*1.2) {
risks = append(risks, "Finding count is trending upward - debt is accumulating")
}
}
return risks
}
func (g *NarrativeGenerator) generateMilestone(phase string, scorecard *Scorecard) string {
switch phase {
case "maintenance":
return "Maintain current quality level"
case "critical":
return "Reduce T4 issues to zero"
case "debt_reduction":
return fmt.Sprintf("Reduce strict score below %d", g.targetScore)
case "cleanup":
return "Clear all T1 and T2 issues"
default:
return "Continue quality improvement"
}
}
func (g *NarrativeGenerator) explainWhyNow(phase string, findings []Finding) string {
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT4 {
return "T4 issues compound over time - addressing them early prevents architectural decay"
}
}
t1Count := 0
for _, f := range findings {
if f.Status == StatusOpen && f.Severity == SeverityT1 {
t1Count++
}
}
if t1Count > 5 {
return "Quick wins available - auto-fixers can clear low-hanging fruit in minutes"
}
return "Consistent small improvements compound into significant quality gains"
}
@@ -0,0 +1,565 @@
package analyzers
import (
"context"
"fmt"
"go/ast"
"go/parser"
"go/token"
"go/types"
"path/filepath"
"strings"
"github.com/yourorg/devour/internal/quality"
"golang.org/x/tools/go/packages"
)
type SingleUseDetector struct {
*quality.BaseDetector
minLOC int
}
func NewSingleUseDetector(finder quality.FileFinder) *SingleUseDetector {
return &SingleUseDetector{
BaseDetector: quality.NewBaseDetector("single_use", quality.SeverityT3, finder),
minLOC: 10,
}
}
func (d *SingleUseDetector) Name() string {
return "single_use"
}
func (d *SingleUseDetector) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *SingleUseDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedFiles | packages.NeedSyntax,
Dir: path,
}
pkgs, err := packages.Load(cfg, "./...")
if err != nil {
return nil, fmt.Errorf("failed to load packages: %w", err)
}
callCounts := make(map[string]int)
funcDefs := make(map[string]FuncDef)
typeUsages := make(map[string]int)
typeDefs := make(map[string]TypeDef)
for _, pkg := range pkgs {
for _, obj := range pkg.TypesInfo.Uses {
if obj == nil {
continue
}
switch obj := obj.(type) {
case *types.Func:
key := obj.Pkg().Path() + "." + obj.Name()
callCounts[key]++
case *types.TypeName:
if obj.Pkg() != nil {
key := obj.Pkg().Path() + "." + obj.Name()
typeUsages[key]++
}
}
}
for _, obj := range pkg.TypesInfo.Defs {
if obj == nil {
continue
}
switch obj := obj.(type) {
case *types.Func:
if obj.Pkg() != nil {
key := obj.Pkg().Path() + "." + obj.Name()
pos := pkg.Fset.Position(obj.Pos())
funcDefs[key] = FuncDef{
Name: obj.Name(),
File: pos.Filename,
Line: pos.Line,
Package: obj.Pkg().Path(),
Exported: obj.Exported(),
Signature: obj.Type().String(),
}
}
case *types.TypeName:
if obj.Pkg() != nil {
key := obj.Pkg().Path() + "." + obj.Name()
pos := pkg.Fset.Position(obj.Pos())
typeDefs[key] = TypeDef{
Name: obj.Name(),
File: pos.Filename,
Line: pos.Line,
Package: obj.Pkg().Path(),
Exported: obj.Exported(),
Underlying: obj.Type().Underlying().String(),
}
}
}
}
}
entryPoints := d.findEntryPoints(pkgs)
var findings []quality.Finding
for key, def := range funcDefs {
if strings.HasSuffix(def.Name, "Test") || strings.HasPrefix(def.Name, "Test") {
continue
}
if strings.HasSuffix(def.Name, "Handler") || strings.HasSuffix(def.Name, "Middleware") {
continue
}
count := callCounts[key]
if count == 1 && !d.isEntryPoint(def.Name, entryPoints) {
loc, _ := d.getFuncLOC(def.File, def.Line)
if loc >= d.minLOC {
finding := quality.Finding{
ID: fmt.Sprintf("single_use_func::%s::%s", def.File, def.Name),
Type: "single_use",
Title: fmt.Sprintf("Single-use function: %s", def.Name),
Description: fmt.Sprintf("Function '%s' is only used once. Consider inlining it or documenting its purpose.", def.Name),
File: def.File,
Line: def.Line,
Severity: quality.SeverityT3,
Score: 3,
Status: quality.StatusOpen,
Metadata: map[string]string{
"name": def.Name,
"usage_count": fmt.Sprintf("%d", count),
"loc": fmt.Sprintf("%d", loc),
"exported": fmt.Sprintf("%v", def.Exported),
},
}
findings = append(findings, finding)
}
}
}
for key, def := range typeDefs {
if strings.HasSuffix(def.Name, "Error") || strings.HasSuffix(def.Name, "Options") {
continue
}
count := typeUsages[key]
if count == 1 {
finding := quality.Finding{
ID: fmt.Sprintf("single_use_type::%s::%s", def.File, def.Name),
Type: "single_use",
Title: fmt.Sprintf("Single-use type: %s", def.Name),
Description: fmt.Sprintf("Type '%s' is only used once. Consider if this abstraction is necessary.", def.Name),
File: def.File,
Line: def.Line,
Severity: quality.SeverityT3,
Score: 4,
Status: quality.StatusOpen,
Metadata: map[string]string{
"name": def.Name,
"usage_count": fmt.Sprintf("%d", count),
"exported": fmt.Sprintf("%v", def.Exported),
"underlying": def.Underlying,
},
}
findings = append(findings, finding)
}
}
return findings, nil
}
func (d *SingleUseDetector) findEntryPoints(pkgs []*packages.Package) map[string]bool {
entryPoints := make(map[string]bool)
for _, pkg := range pkgs {
for _, file := range pkg.Syntax {
ast.Inspect(file, func(n ast.Node) bool {
switch node := n.(type) {
case *ast.FuncDecl:
if node.Name.Name == "main" {
entryPoints[pkg.PkgPath+".main"] = true
}
if node.Name.Name == "init" {
entryPoints[pkg.PkgPath+".init"] = true
}
if node.Recv == nil {
for _, decl := range node.Type.Params.List {
if d.isHTTPHandlerType(decl.Type) {
entryPoints[pkg.PkgPath+"."+node.Name.Name] = true
}
}
}
}
return true
})
}
}
return entryPoints
}
func (d *SingleUseDetector) isHTTPHandlerType(expr ast.Expr) bool {
if sel, ok := expr.(*ast.SelectorExpr); ok {
if ident, ok := sel.X.(*ast.Ident); ok {
return (ident.Name == "http" && (sel.Sel.Name == "Handler" || sel.Sel.Name == "HandlerFunc" || sel.Sel.Name == "ResponseWriter"))
}
}
if star, ok := expr.(*ast.StarExpr); ok {
return d.isHTTPHandlerType(star.X)
}
return false
}
func (d *SingleUseDetector) isEntryPoint(name string, entryPoints map[string]bool) bool {
return entryPoints[name] || name == "main" || name == "init"
}
func (d *SingleUseDetector) getFuncLOC(file string, startLine int) (int, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, file, nil, 0)
if err != nil {
return 0, err
}
loc := 0
ast.Inspect(node, func(n ast.Node) bool {
if fn, ok := n.(*ast.FuncDecl); ok {
pos := fset.Position(fn.Pos())
if pos.Line == startLine {
end := fset.Position(fn.End())
loc = end.Line - pos.Line + 1
return false
}
}
return true
})
return loc, nil
}
type FuncDef struct {
Name string
File string
Line int
Package string
Exported bool
Signature string
}
type TypeDef struct {
Name string
File string
Line int
Package string
Exported bool
Underlying string
}
type CouplingDetector struct {
*quality.BaseDetector
maxFanOut int
}
func NewCouplingDetector(finder quality.FileFinder) *CouplingDetector {
return &CouplingDetector{
BaseDetector: quality.NewBaseDetector("coupling", quality.SeverityT3, finder),
maxFanOut: 10,
}
}
func (d *CouplingDetector) Name() string {
return "coupling"
}
func (d *CouplingDetector) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *CouplingDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedImports | packages.NeedFiles,
Dir: path,
}
pkgs, err := packages.Load(cfg, "./...")
if err != nil {
return nil, fmt.Errorf("failed to load packages: %w", err)
}
pkgImports := make(map[string][]string)
pkgImportedBy := make(map[string][]string)
pkgFiles := make(map[string]int)
for _, pkg := range pkgs {
pkgFiles[pkg.PkgPath] = len(pkg.GoFiles)
for _, imp := range pkg.Imports {
pkgImports[pkg.PkgPath] = append(pkgImports[pkg.PkgPath], imp.PkgPath)
pkgImportedBy[imp.PkgPath] = append(pkgImportedBy[imp.PkgPath], pkg.PkgPath)
}
}
var findings []quality.Finding
for pkg, imports := range pkgImports {
fanOut := len(imports)
if fanOut > d.maxFanOut {
finding := quality.Finding{
ID: fmt.Sprintf("coupling_fanout::%s", pkg),
Type: "coupling",
Title: fmt.Sprintf("High fan-out coupling: %s", filepath.Base(pkg)),
Description: fmt.Sprintf("Package '%s' imports %d packages (max: %d). Consider reducing dependencies.", pkg, fanOut, d.maxFanOut),
File: pkg,
Line: 1,
Severity: quality.SeverityT3,
Score: fanOut - d.maxFanOut,
Status: quality.StatusOpen,
Metadata: map[string]string{
"package": pkg,
"fan_out": fmt.Sprintf("%d", fanOut),
"imports": strings.Join(imports, ","),
},
}
findings = append(findings, finding)
}
}
for pkg, importedBy := range pkgImportedBy {
fanIn := len(importedBy)
if fanIn > d.maxFanOut*2 {
finding := quality.Finding{
ID: fmt.Sprintf("coupling_fanin::%s", pkg),
Type: "coupling",
Title: fmt.Sprintf("High fan-in coupling: %s", filepath.Base(pkg)),
Description: fmt.Sprintf("Package '%s' is imported by %d packages. Ensure it's stable and well-documented.", pkg, fanIn),
File: pkg,
Line: 1,
Severity: quality.SeverityT2,
Score: fanIn/5 - d.maxFanOut/5,
Status: quality.StatusOpen,
Metadata: map[string]string{
"package": pkg,
"fan_in": fmt.Sprintf("%d", fanIn),
"imported_by": strings.Join(importedBy, ","),
},
}
findings = append(findings, finding)
}
}
findings = append(findings, d.detectHubPackages(pkgImports, pkgImportedBy)...)
return findings, nil
}
func (d *CouplingDetector) detectHubPackages(pkgImports, pkgImportedBy map[string][]string) []quality.Finding {
var findings []quality.Finding
for pkg, imports := range pkgImports {
importedBy := pkgImportedBy[pkg]
centrality := len(imports) + len(importedBy)
if centrality > d.maxFanOut*3 {
finding := quality.Finding{
ID: fmt.Sprintf("coupling_hub::%s", pkg),
Type: "coupling",
Title: fmt.Sprintf("Hub package detected: %s", filepath.Base(pkg)),
Description: fmt.Sprintf("Package '%s' is a coupling hub with %d connections (%d imports, %d imported by). Consider splitting.", pkg, centrality, len(imports), len(importedBy)),
File: pkg,
Line: 1,
Severity: quality.SeverityT4,
Score: centrality / 5,
Status: quality.StatusOpen,
Metadata: map[string]string{
"package": pkg,
"centrality": fmt.Sprintf("%d", centrality),
"fan_out": fmt.Sprintf("%d", len(imports)),
"fan_in": fmt.Sprintf("%d", len(importedBy)),
},
}
findings = append(findings, finding)
}
}
return findings
}
type EnhancedDeadCodeDetector struct {
*quality.BaseDetector
}
func NewEnhancedDeadCodeDetector(finder quality.FileFinder) *EnhancedDeadCodeDetector {
return &EnhancedDeadCodeDetector{
BaseDetector: quality.NewBaseDetector("dead_code_enhanced", quality.SeverityT2, finder),
}
}
func (d *EnhancedDeadCodeDetector) Name() string {
return "dead_code_enhanced"
}
func (d *EnhancedDeadCodeDetector) Severity() quality.Severity {
return quality.SeverityT2
}
func (d *EnhancedDeadCodeDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedFiles | packages.NeedSyntax,
Dir: path,
}
pkgs, err := packages.Load(cfg, "./...")
if err != nil {
return nil, fmt.Errorf("failed to load packages: %w", err)
}
used := make(map[string]bool)
defs := make(map[string]ObjInfo)
entryPoints := make(map[string]bool)
for _, pkg := range pkgs {
if pkg.Name == "main" {
for _, file := range pkg.Syntax {
ast.Inspect(file, func(n ast.Node) bool {
if fn, ok := n.(*ast.FuncDecl); ok {
if fn.Name.Name == "main" || fn.Name.Name == "init" {
entryPoints[pkg.PkgPath+"."+fn.Name.Name] = true
}
}
return true
})
}
}
for _, obj := range pkg.TypesInfo.Uses {
if obj != nil && obj.Pkg() != nil {
used[obj.Pkg().Path()+"."+obj.Name()] = true
}
}
for _, obj := range pkg.TypesInfo.Defs {
if obj == nil || obj.Pkg() == nil {
continue
}
key := obj.Pkg().Path() + "." + obj.Name()
pos := pkg.Fset.Position(obj.Pos())
switch o := obj.(type) {
case *types.Func:
defs[key] = ObjInfo{
Name: obj.Name(),
Type: "function",
File: pos.Filename,
Line: pos.Line,
Package: obj.Pkg().Path(),
Exported: obj.Exported(),
Signature: o.Type().String(),
}
case *types.TypeName:
defs[key] = ObjInfo{
Name: obj.Name(),
Type: "type",
File: pos.Filename,
Line: pos.Line,
Package: obj.Pkg().Path(),
Exported: obj.Exported(),
Underlying: o.Type().Underlying().String(),
}
case *types.Var:
if obj.Exported() {
defs[key] = ObjInfo{
Name: obj.Name(),
Type: "variable",
File: pos.Filename,
Line: pos.Line,
Package: obj.Pkg().Path(),
Exported: obj.Exported(),
}
}
}
}
}
testPkgs := make(map[string]bool)
for _, pkg := range pkgs {
if strings.HasSuffix(pkg.PkgPath, "_test") || strings.Contains(pkg.Name, "test") {
testPkgs[pkg.PkgPath] = true
}
for _, file := range pkg.GoFiles {
if strings.HasSuffix(file, "_test.go") {
testPkgs[pkg.PkgPath] = true
}
}
}
var findings []quality.Finding
for key, def := range defs {
if entryPoints[key] {
continue
}
if strings.HasPrefix(def.Name, "Test") || strings.HasPrefix(def.Name, "Benchmark") || strings.HasPrefix(def.Name, "Fuzz") {
continue
}
if strings.HasSuffix(def.Name, "Error") && def.Type == "type" {
continue
}
if strings.Contains(def.File, "_test.go") {
continue
}
if !used[key] && def.Exported {
severity := quality.SeverityT2
score := 5
if strings.HasSuffix(def.File, "/cmd/") || strings.Contains(def.File, "/cmd/") {
severity = quality.SeverityT3
score = 3
}
if def.Type == "type" {
severity = quality.SeverityT3
score = 4
}
finding := quality.Finding{
ID: fmt.Sprintf("dead_code::%s::%s", def.File, def.Name),
Type: "dead_code",
Title: fmt.Sprintf("Unused exported %s: %s", def.Type, def.Name),
Description: fmt.Sprintf("The exported %s '%s' is never used. Consider removing it or if it's part of a public API, document it.", def.Type, def.Name),
File: def.File,
Line: def.Line,
Severity: severity,
Score: score,
Status: quality.StatusOpen,
Metadata: map[string]string{
"name": def.Name,
"obj_type": def.Type,
"package": def.Package,
"exported": "true",
},
}
findings = append(findings, finding)
}
}
return findings, nil
}
type ObjInfo struct {
Name string
Type string
File string
Line int
Package string
Exported bool
Signature string
Underlying string
}
@@ -0,0 +1,304 @@
package analyzers
import (
"context"
"fmt"
"go/parser"
"go/token"
"os"
"strings"
"github.com/yourorg/devour/internal/quality"
"golang.org/x/tools/go/packages"
)
type DeadCodeDetector struct {
*quality.BaseDetector
}
func NewDeadCodeDetector(finder quality.FileFinder) *DeadCodeDetector {
return &DeadCodeDetector{
BaseDetector: quality.NewBaseDetector("dead_code", quality.SeverityT2, finder),
}
}
func (d *DeadCodeDetector) Name() string {
return "dead_code"
}
func (d *DeadCodeDetector) Severity() quality.Severity {
return quality.SeverityT2
}
func (d *DeadCodeDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedFiles,
Dir: path,
}
pkgs, err := packages.Load(cfg, "./...")
if err != nil {
return nil, fmt.Errorf("failed to load packages: %w", err)
}
var findings []quality.Finding
used := make(map[string]bool)
for _, pkg := range pkgs {
for _, obj := range pkg.TypesInfo.Uses {
if obj != nil && obj.Pkg() != nil {
used[obj.Pkg().Path()+"."+obj.Name()] = true
}
}
}
for _, pkg := range pkgs {
for _, obj := range pkg.TypesInfo.Defs {
if obj == nil || obj.Pkg() == nil {
continue
}
if !obj.Exported() {
continue
}
key := obj.Pkg().Path() + "." + obj.Name()
if !used[key] {
pos := pkg.Fset.Position(obj.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("dead_code::%s::%s", pos.Filename, obj.Name()),
Type: "dead_code",
Title: fmt.Sprintf("Unused exported identifier: %s", obj.Name()),
Description: fmt.Sprintf("The exported %s '%s' is never used in the codebase. Consider removing it or documenting its intended use.", obj.Type(), obj.Name()),
File: pos.Filename,
Line: pos.Line,
Severity: quality.SeverityT2,
Score: 5,
Status: quality.StatusOpen,
Metadata: map[string]string{
"name": obj.Name(),
"type": obj.Type().String(),
"package": obj.Pkg().Path(),
"exported": "true",
},
}
findings = append(findings, finding)
}
}
}
return findings, nil
}
type UnusedImportDetector struct {
*quality.BaseDetector
}
func NewUnusedImportDetector(finder quality.FileFinder) *UnusedImportDetector {
return &UnusedImportDetector{
BaseDetector: quality.NewBaseDetector("unused_import", quality.SeverityT1, finder),
}
}
func (d *UnusedImportDetector) Name() string {
return "unused_import"
}
func (d *UnusedImportDetector) Severity() quality.Severity {
return quality.SeverityT1
}
func (d *UnusedImportDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, file := range files {
fileFindings, err := d.analyzeFile(file)
if err != nil {
continue
}
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (d *UnusedImportDetector) analyzeFile(path string) ([]quality.Finding, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, path, nil, parser.ImportsOnly|parser.ParseComments)
if err != nil {
return nil, err
}
imports := make(map[string]string)
for _, imp := range node.Imports {
pkgPath := strings.Trim(imp.Path.Value, `"`)
name := ""
if imp.Name != nil {
name = imp.Name.Name
} else {
parts := strings.Split(pkgPath, "/")
name = parts[len(parts)-1]
}
imports[pkgPath] = name
}
content, err := os.ReadFile(path)
if err != nil {
return nil, err
}
contentStr := string(content)
var findings []quality.Finding
for _, imp := range node.Imports {
pkgPath := strings.Trim(imp.Path.Value, `"`)
name := ""
if imp.Name != nil {
name = imp.Name.Name
} else {
parts := strings.Split(pkgPath, "/")
name = parts[len(parts)-1]
}
if name == "_" || name == "." {
continue
}
pattern := name + "."
if !strings.Contains(contentStr, pattern) {
pos := fset.Position(imp.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("unused_import::%s::%s", path, pkgPath),
Type: "unused_import",
Title: fmt.Sprintf("Unused import: %s", pkgPath),
Description: fmt.Sprintf("The import '%s' is not used in this file. Remove it to clean up the code.", pkgPath),
File: path,
Line: pos.Line,
Severity: quality.SeverityT1,
Score: 2,
Status: quality.StatusOpen,
Metadata: map[string]string{
"import_path": pkgPath,
"alias": name,
},
}
findings = append(findings, finding)
}
}
return findings, nil
}
type CycleDetector struct {
*quality.BaseDetector
}
func NewCycleDetector(finder quality.FileFinder) *CycleDetector {
return &CycleDetector{
BaseDetector: quality.NewBaseDetector("import_cycle", quality.SeverityT4, finder),
}
}
func (d *CycleDetector) Name() string {
return "import_cycle"
}
func (d *CycleDetector) Severity() quality.Severity {
return quality.SeverityT4
}
func (d *CycleDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedImports,
Dir: path,
}
pkgs, err := packages.Load(cfg, "./...")
if err != nil {
return nil, fmt.Errorf("failed to load packages: %w", err)
}
localPkgs := make(map[string]bool)
for _, pkg := range pkgs {
localPkgs[pkg.PkgPath] = true
}
graph := make(map[string][]string)
for _, pkg := range pkgs {
for _, imp := range pkg.Imports {
if localPkgs[imp.PkgPath] {
graph[pkg.PkgPath] = append(graph[pkg.PkgPath], imp.PkgPath)
}
}
}
cycles := d.findCycles(graph)
var findings []quality.Finding
for i, cycle := range cycles {
finding := quality.Finding{
ID: fmt.Sprintf("import_cycle::%d", i),
Type: "import_cycle",
Title: "Import cycle detected",
Description: fmt.Sprintf("Circular import dependency: %s", strings.Join(cycle, " → ")),
File: cycle[0],
Line: 1,
Severity: quality.SeverityT4,
Score: 20,
Status: quality.StatusOpen,
Metadata: map[string]string{
"cycle": strings.Join(cycle, ","),
},
}
findings = append(findings, finding)
}
return findings, nil
}
func (d *CycleDetector) findCycles(graph map[string][]string) [][]string {
var cycles [][]string
visited := make(map[string]bool)
recStack := make(map[string]bool)
var dfs func(node string, path []string)
dfs = func(node string, path []string) {
visited[node] = true
recStack[node] = true
path = append(path, node)
for _, neighbor := range graph[node] {
if !visited[neighbor] {
dfs(neighbor, path)
} else if recStack[neighbor] {
cycleStart := -1
for i, n := range path {
if n == neighbor {
cycleStart = i
break
}
}
if cycleStart >= 0 {
cycle := make([]string, len(path)-cycleStart)
copy(cycle, path[cycleStart:])
cycles = append(cycles, cycle)
}
}
}
path = path[:len(path)-1]
recStack[node] = false
}
for node := range graph {
if !visited[node] {
dfs(node, []string{})
}
}
return cycles
}
@@ -0,0 +1,500 @@
package analyzers
import (
"context"
"fmt"
"go/ast"
"go/parser"
"go/token"
"os"
"path/filepath"
"strings"
"github.com/yourorg/devour/internal/quality"
)
type LargeFileDetector struct {
*quality.BaseDetector
maxLOC int
}
func NewLargeFileDetector(finder quality.FileFinder) *LargeFileDetector {
return &LargeFileDetector{
BaseDetector: quality.NewBaseDetector("large_file", quality.SeverityT3, finder),
maxLOC: 500,
}
}
func (d *LargeFileDetector) Name() string {
return "large_file"
}
func (d *LargeFileDetector) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *LargeFileDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, file := range files {
loc, err := countLines(file)
if err != nil {
continue
}
if loc > d.maxLOC {
finding := quality.Finding{
ID: fmt.Sprintf("large_file::%s", file),
Type: "large_file",
Title: fmt.Sprintf("Large file detected: %d lines", loc),
Description: fmt.Sprintf("File '%s' has %d lines (max: %d). Consider splitting into smaller, focused files.", filepath.Base(file), loc, d.maxLOC),
File: file,
Line: 1,
Severity: quality.SeverityT3,
Score: (loc - d.maxLOC) / 50,
Status: quality.StatusOpen,
Metadata: map[string]string{
"loc": fmt.Sprintf("%d", loc),
"max_loc": fmt.Sprintf("%d", d.maxLOC),
},
}
findings = append(findings, finding)
}
}
return findings, nil
}
type GodStructDetector struct {
*quality.BaseDetector
maxFields int
maxMethods int
}
func NewGodStructDetector(finder quality.FileFinder) *GodStructDetector {
return &GodStructDetector{
BaseDetector: quality.NewBaseDetector("god_struct", quality.SeverityT3, finder),
maxFields: 15,
maxMethods: 20,
}
}
func (d *GodStructDetector) Name() string {
return "god_struct"
}
func (d *GodStructDetector) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *GodStructDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, file := range files {
fileFindings := d.analyzeFile(file)
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (d *GodStructDetector) analyzeFile(path string) []quality.Finding {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, path, nil, 0)
if err != nil {
return nil
}
methodCounts := make(map[string]int)
for _, decl := range node.Decls {
if fn, ok := decl.(*ast.FuncDecl); ok && fn.Recv != nil {
for _, field := range fn.Recv.List {
for _, name := range field.Names {
methodCounts[name.Name]++
}
if len(field.Names) == 0 {
if star, ok := field.Type.(*ast.StarExpr); ok {
if ident, ok := star.X.(*ast.Ident); ok {
methodCounts[ident.Name]++
}
} else if ident, ok := field.Type.(*ast.Ident); ok {
methodCounts[ident.Name]++
}
}
}
}
}
var findings []quality.Finding
for _, decl := range node.Decls {
gen, ok := decl.(*ast.GenDecl)
if !ok || gen.Tok != token.TYPE {
continue
}
for _, spec := range gen.Specs {
typeSpec, ok := spec.(*ast.TypeSpec)
if !ok {
continue
}
structType, ok := typeSpec.Type.(*ast.StructType)
if !ok {
continue
}
fieldCount := len(structType.Fields.List)
methodCount := methodCounts[typeSpec.Name.Name]
if fieldCount > d.maxFields {
pos := fset.Position(typeSpec.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("god_struct::%s::%s", path, typeSpec.Name.Name),
Type: "god_struct",
Title: fmt.Sprintf("God struct detected: %s", typeSpec.Name.Name),
Description: fmt.Sprintf("Struct '%s' has %d fields (max: %d). Consider breaking it into smaller, focused structs.", typeSpec.Name.Name, fieldCount, d.maxFields),
File: path,
Line: pos.Line,
Severity: quality.SeverityT3,
Score: (fieldCount - d.maxFields) * 2,
Status: quality.StatusOpen,
Metadata: map[string]string{
"struct_name": typeSpec.Name.Name,
"field_count": fmt.Sprintf("%d", fieldCount),
"max_fields": fmt.Sprintf("%d", d.maxFields),
},
}
findings = append(findings, finding)
}
if methodCount > d.maxMethods {
pos := fset.Position(typeSpec.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("god_struct_methods::%s::%s", path, typeSpec.Name.Name),
Type: "god_struct",
Title: fmt.Sprintf("God struct (methods): %s", typeSpec.Name.Name),
Description: fmt.Sprintf("Struct '%s' has %d methods (max: %d). Consider splitting responsibilities.", typeSpec.Name.Name, methodCount, d.maxMethods),
File: path,
Line: pos.Line,
Severity: quality.SeverityT3,
Score: (methodCount - d.maxMethods) * 2,
Status: quality.StatusOpen,
Metadata: map[string]string{
"struct_name": typeSpec.Name.Name,
"method_count": fmt.Sprintf("%d", methodCount),
"max_methods": fmt.Sprintf("%d", d.maxMethods),
},
}
findings = append(findings, finding)
}
}
}
return findings
}
type DebugLogDetector struct {
*quality.BaseDetector
}
func NewDebugLogDetector(finder quality.FileFinder) *DebugLogDetector {
return &DebugLogDetector{
BaseDetector: quality.NewBaseDetector("debug_log", quality.SeverityT1, finder),
}
}
func (d *DebugLogDetector) Name() string {
return "debug_log"
}
func (d *DebugLogDetector) Severity() quality.Severity {
return quality.SeverityT1
}
func (d *DebugLogDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, file := range files {
fileFindings := d.analyzeFile(file)
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (d *DebugLogDetector) analyzeFile(path string) []quality.Finding {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, path, nil, 0)
if err != nil {
return nil
}
debugPatterns := []string{
"log.Print",
"log.Println",
"log.Printf",
"log.Fatal",
"log.Fatalf",
"log.Fatalln",
}
cliPatterns := []string{
"fmt.Print",
"fmt.Println",
"fmt.Printf",
}
var findings []quality.Finding
ast.Inspect(node, func(n ast.Node) bool {
call, ok := n.(*ast.CallExpr)
if !ok {
return true
}
callStr := exprToString(call.Fun)
for _, pattern := range debugPatterns {
if callStr == pattern || strings.HasPrefix(callStr, pattern) {
if strings.Contains(path, "_test.go") {
return true
}
pos := fset.Position(call.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("debug_log::%s::%d", path, pos.Line),
Type: "debug_log",
Title: "Debug log statement detected",
Description: fmt.Sprintf("Found '%s' statement. Consider using structured logging instead.", callStr),
File: path,
Line: pos.Line,
Severity: quality.SeverityT1,
Score: 2,
Status: quality.StatusOpen,
Metadata: map[string]string{
"call": callStr,
},
}
findings = append(findings, finding)
break
}
}
if strings.Contains(path, "/cmd/") {
return true
}
for _, pattern := range cliPatterns {
if callStr == pattern || strings.HasPrefix(callStr, pattern) {
pos := fset.Position(call.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("debug_log::%s::%d", path, pos.Line),
Type: "debug_log",
Title: "Potential debug print in non-CLI code",
Description: fmt.Sprintf("Found '%s' in library code. Consider using structured logging or returning errors.", callStr),
File: path,
Line: pos.Line,
Severity: quality.SeverityT1,
Score: 2,
Status: quality.StatusOpen,
Metadata: map[string]string{
"call": callStr,
},
}
findings = append(findings, finding)
break
}
}
return true
})
return findings
}
type GodFunctionDetector struct {
*quality.BaseDetector
maxLOC int
maxParams int
maxReturns int
maxNesting int
}
func NewGodFunctionDetector(finder quality.FileFinder) *GodFunctionDetector {
return &GodFunctionDetector{
BaseDetector: quality.NewBaseDetector("god_function", quality.SeverityT3, finder),
maxLOC: 50,
maxParams: 5,
maxReturns: 3,
maxNesting: 4,
}
}
func (d *GodFunctionDetector) Name() string {
return "god_function"
}
func (d *GodFunctionDetector) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *GodFunctionDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, file := range files {
fileFindings := d.analyzeFile(file)
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (d *GodFunctionDetector) analyzeFile(path string) []quality.Finding {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, path, nil, 0)
if err != nil {
return nil
}
var findings []quality.Finding
for _, decl := range node.Decls {
fn, ok := decl.(*ast.FuncDecl)
if !ok {
continue
}
startLine := fset.Position(fn.Pos()).Line
endLine := fset.Position(fn.End()).Line
loc := endLine - startLine + 1
paramCount := 0
if fn.Type.Params != nil {
for _, field := range fn.Type.Params.List {
paramCount += len(field.Names)
if len(field.Names) == 0 {
paramCount++
}
}
}
returnCount := 0
if fn.Type.Results != nil {
returnCount = len(fn.Type.Results.List)
}
nestingDepth := d.calculateNesting(fn)
var issues []string
if loc > d.maxLOC {
issues = append(issues, fmt.Sprintf("%d lines (max %d)", loc, d.maxLOC))
}
if paramCount > d.maxParams {
issues = append(issues, fmt.Sprintf("%d params (max %d)", paramCount, d.maxParams))
}
if returnCount > d.maxReturns {
issues = append(issues, fmt.Sprintf("%d returns (max %d)", returnCount, d.maxReturns))
}
if nestingDepth > d.maxNesting {
issues = append(issues, fmt.Sprintf("nesting depth %d (max %d)", nestingDepth, d.maxNesting))
}
if len(issues) > 0 {
finding := quality.Finding{
ID: fmt.Sprintf("god_function::%s::%s", path, fn.Name.Name),
Type: "god_function",
Title: fmt.Sprintf("God function: %s", fn.Name.Name),
Description: fmt.Sprintf("Function '%s' has issues: %s", fn.Name.Name, strings.Join(issues, ", ")),
File: path,
Line: startLine,
Severity: quality.SeverityT3,
Score: len(issues) * 3,
Status: quality.StatusOpen,
Metadata: map[string]string{
"function": fn.Name.Name,
"loc": fmt.Sprintf("%d", loc),
"params": fmt.Sprintf("%d", paramCount),
"returns": fmt.Sprintf("%d", returnCount),
"nesting_depth": fmt.Sprintf("%d", nestingDepth),
},
}
findings = append(findings, finding)
}
}
return findings
}
func (d *GodFunctionDetector) calculateNesting(fn *ast.FuncDecl) int {
maxDepth := 0
var visit func(n ast.Node, depth int)
visit = func(n ast.Node, depth int) {
if depth > maxDepth {
maxDepth = depth
}
switch stmt := n.(type) {
case *ast.IfStmt:
visit(stmt.Body, depth+1)
if stmt.Else != nil {
visit(stmt.Else, depth+1)
}
case *ast.ForStmt:
visit(stmt.Body, depth+1)
case *ast.RangeStmt:
visit(stmt.Body, depth+1)
case *ast.SwitchStmt:
visit(stmt.Body, depth+1)
case *ast.SelectStmt:
visit(stmt.Body, depth+1)
case *ast.BlockStmt:
for _, s := range stmt.List {
visit(s, depth)
}
case *ast.CaseClause:
for _, s := range stmt.Body {
visit(s, depth)
}
}
}
if fn.Body != nil {
visit(fn.Body, 0)
}
return maxDepth
}
func exprToString(expr ast.Expr) string {
switch e := expr.(type) {
case *ast.Ident:
return e.Name
case *ast.SelectorExpr:
return exprToString(e.X) + "." + e.Sel.Name
default:
return ""
}
}
func countLines(path string) (int, error) {
data, err := os.ReadFile(path)
if err != nil {
return 0, err
}
return strings.Count(string(data), "\n") + 1, nil
}
@@ -0,0 +1,410 @@
package analyzers
import (
"context"
"fmt"
"go/ast"
"go/parser"
"go/token"
"regexp"
"strings"
"github.com/yourorg/devour/internal/quality"
)
type SecurityDetector struct {
*quality.BaseDetector
patterns []SecurityPattern
}
type SecurityPattern struct {
Name string
Description string
Pattern *regexp.Regexp
Severity quality.Severity
Score int
}
func NewSecurityDetector(finder quality.FileFinder) *SecurityDetector {
d := &SecurityDetector{
BaseDetector: quality.NewBaseDetector("security", quality.SeverityT3, finder),
patterns: []SecurityPattern{
{
Name: "hardcoded_password",
Description: "Hardcoded password or secret detected",
Pattern: regexp.MustCompile(`(?i)(password|passwd|pwd|secret|api_key|apikey|token)\s*[:=]\s*["'][^"']+["']`),
Severity: quality.SeverityT4,
Score: 30,
},
{
Name: "sql_injection_risk",
Description: "Potential SQL injection - string concatenation in query",
Pattern: regexp.MustCompile(`fmt\.Sprintf.*SELECT|fmt\.Sprintf.*INSERT|fmt\.Sprintf.*UPDATE|fmt\.Sprintf.*DELETE`),
Severity: quality.SeverityT4,
Score: 25,
},
{
Name: "unsafe_sql_exec",
Description: "Direct string interpolation in SQL execution",
Pattern: regexp.MustCompile(`db\.(Exec|Query).*\+|db\.(Exec|Query).*fmt\.Sprintf`),
Severity: quality.SeverityT4,
Score: 25,
},
{
Name: "weak_random",
Description: "Using math/rand for security-sensitive operations",
Pattern: regexp.MustCompile(`math/rand.*token|math/rand.*password|math/rand.*secret|math/rand.*key`),
Severity: quality.SeverityT3,
Score: 15,
},
{
Name: "todo_security",
Description: "TODO/FIXME related to security",
Pattern: regexp.MustCompile(`(?i)(TODO|FIXME|XXX).*security|(?i)(TODO|FIXME|XXX).*auth|(?i)(TODO|FIXME|XXX).*password`),
Severity: quality.SeverityT2,
Score: 5,
},
{
Name: "os_exec_shell",
Description: "Command execution with potential shell injection",
Pattern: regexp.MustCompile(`exec\.Command.*sh.*-c|exec\.Command.*bash.*-c`),
Severity: quality.SeverityT4,
Score: 30,
},
},
}
return d
}
func (d *SecurityDetector) Name() string {
return "security"
}
func (d *SecurityDetector) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *SecurityDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, file := range files {
fileFindings, err := d.analyzeFile(file)
if err != nil {
continue
}
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (d *SecurityDetector) analyzeFile(filePath string) ([]quality.Finding, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, filePath, nil, parser.ParseComments)
if err != nil {
return nil, err
}
var findings []quality.Finding
ast.Inspect(node, func(n ast.Node) bool {
switch x := n.(type) {
case *ast.CallExpr:
d.checkCallExpr(x, fset, filePath, &findings)
case *ast.AssignStmt:
d.checkAssignStmt(x, fset, filePath, &findings)
case *ast.ValueSpec:
d.checkValueSpec(x, fset, filePath, &findings)
}
return true
})
d.checkComments(node, fset, filePath, &findings)
return findings, nil
}
func (d *SecurityDetector) checkCallExpr(expr *ast.CallExpr, fset *token.FileSet, file string, findings *[]quality.Finding) {
exprStr := d.nodeToString(expr)
pos := fset.Position(expr.Pos())
for _, pattern := range d.patterns {
if pattern.Pattern.MatchString(exprStr) {
finding := quality.Finding{
ID: fmt.Sprintf("security::%s::%d", file, pos.Line),
Type: "security",
Title: pattern.Name,
Description: pattern.Description,
File: file,
Line: pos.Line,
Severity: pattern.Severity,
Score: pattern.Score,
Status: quality.StatusOpen,
Metadata: map[string]string{
"pattern": pattern.Name,
"match": exprStr,
"severity": fmt.Sprintf("%d", pattern.Severity),
},
}
*findings = append(*findings, finding)
break
}
}
}
func (d *SecurityDetector) checkAssignStmt(stmt *ast.AssignStmt, fset *token.FileSet, file string, findings *[]quality.Finding) {
for _, expr := range stmt.Lhs {
if ident, ok := expr.(*ast.Ident); ok {
if strings.Contains(strings.ToLower(ident.Name), "password") ||
strings.Contains(strings.ToLower(ident.Name), "secret") ||
strings.Contains(strings.ToLower(ident.Name), "token") {
for _, val := range stmt.Rhs {
if basicLit, ok := val.(*ast.BasicLit); ok && basicLit.Kind == token.STRING {
pos := fset.Position(stmt.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("security::%s::%d", file, pos.Line),
Type: "security",
Title: "hardcoded_credential",
Description: fmt.Sprintf("Hardcoded credential in variable '%s'", ident.Name),
File: file,
Line: pos.Line,
Severity: quality.SeverityT4,
Score: 30,
Status: quality.StatusOpen,
Metadata: map[string]string{
"variable": ident.Name,
},
}
*findings = append(*findings, finding)
}
}
}
}
}
}
func (d *SecurityDetector) checkValueSpec(spec *ast.ValueSpec, fset *token.FileSet, file string, findings *[]quality.Finding) {
for i, name := range spec.Names {
lowerName := strings.ToLower(name.Name)
if strings.Contains(lowerName, "password") ||
strings.Contains(lowerName, "secret") ||
strings.Contains(lowerName, "apikey") ||
strings.Contains(lowerName, "token") {
if len(spec.Values) > i {
if basicLit, ok := spec.Values[i].(*ast.BasicLit); ok && basicLit.Kind == token.STRING {
pos := fset.Position(spec.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("security::%s::%d", file, pos.Line),
Type: "security",
Title: "hardcoded_credential",
Description: fmt.Sprintf("Hardcoded credential in variable '%s'", name.Name),
File: file,
Line: pos.Line,
Severity: quality.SeverityT4,
Score: 30,
Status: quality.StatusOpen,
Metadata: map[string]string{
"variable": name.Name,
},
}
*findings = append(*findings, finding)
}
}
}
}
}
func (d *SecurityDetector) checkComments(node *ast.File, fset *token.FileSet, file string, findings *[]quality.Finding) {
for _, group := range node.Comments {
for _, comment := range group.List {
text := comment.Text
for _, pattern := range d.patterns {
if pattern.Pattern.MatchString(text) {
pos := fset.Position(comment.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("security::%s::%d", file, pos.Line),
Type: "security",
Title: pattern.Name,
Description: pattern.Description,
File: file,
Line: pos.Line,
Severity: pattern.Severity,
Score: pattern.Score,
Status: quality.StatusOpen,
Metadata: map[string]string{
"pattern": pattern.Name,
"in_comment": "true",
},
}
*findings = append(*findings, finding)
break
}
}
}
}
}
func (d *SecurityDetector) nodeToString(node ast.Node) string {
var b strings.Builder
fmt.Fprint(&b, node)
return b.String()
}
type ComplexityASTDetector struct {
*quality.BaseDetector
maxComplexity int
maxNesting int
}
func NewComplexityASTDetector(finder quality.FileFinder) *ComplexityASTDetector {
return &ComplexityASTDetector{
BaseDetector: quality.NewBaseDetector("complexity_ast", quality.SeverityT2, finder),
maxComplexity: 15,
maxNesting: 4,
}
}
func (d *ComplexityASTDetector) Name() string {
return "complexity_ast"
}
func (d *ComplexityASTDetector) Severity() quality.Severity {
return quality.SeverityT2
}
func (d *ComplexityASTDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, file := range files {
fileFindings, err := d.analyzeFile(file)
if err != nil {
continue
}
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (d *ComplexityASTDetector) analyzeFile(filePath string) ([]quality.Finding, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, filePath, nil, 0)
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, decl := range node.Decls {
if fn, ok := decl.(*ast.FuncDecl); ok {
complexity := d.calculateCyclomaticComplexity(fn)
nesting := d.calculateNestingDepth(fn)
if complexity > d.maxComplexity {
pos := fset.Position(fn.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("complexity::%s::%s", filePath, fn.Name.Name),
Type: "complexity",
Title: fmt.Sprintf("High cyclomatic complexity in %s", fn.Name.Name),
Description: fmt.Sprintf("Function '%s' has cyclomatic complexity of %d (max: %d). Consider breaking it into smaller functions.", fn.Name.Name, complexity, d.maxComplexity),
File: filePath,
Line: pos.Line,
Severity: quality.SeverityT2,
Score: complexity - d.maxComplexity,
Status: quality.StatusOpen,
Metadata: map[string]string{
"function": fn.Name.Name,
"complexity": fmt.Sprintf("%d", complexity),
"max_complexity": fmt.Sprintf("%d", d.maxComplexity),
},
}
findings = append(findings, finding)
}
if nesting > d.maxNesting {
pos := fset.Position(fn.Pos())
finding := quality.Finding{
ID: fmt.Sprintf("nesting::%s::%s", filePath, fn.Name.Name),
Type: "complexity",
Title: fmt.Sprintf("Deep nesting in %s", fn.Name.Name),
Description: fmt.Sprintf("Function '%s' has nesting depth of %d (max: %d). Consider extracting logic into helper functions.", fn.Name.Name, nesting, d.maxNesting),
File: filePath,
Line: pos.Line,
Severity: quality.SeverityT3,
Score: (nesting - d.maxNesting) * 3,
Status: quality.StatusOpen,
Metadata: map[string]string{
"function": fn.Name.Name,
"nesting": fmt.Sprintf("%d", nesting),
"max_nesting": fmt.Sprintf("%d", d.maxNesting),
},
}
findings = append(findings, finding)
}
}
}
return findings, nil
}
func (d *ComplexityASTDetector) calculateCyclomaticComplexity(fn *ast.FuncDecl) int {
complexity := 1
ast.Inspect(fn, func(n ast.Node) bool {
switch n.(type) {
case *ast.IfStmt, *ast.ForStmt, *ast.RangeStmt:
complexity++
case *ast.CaseClause:
complexity++
case *ast.BinaryExpr:
complexity++
}
return true
})
return complexity
}
func (d *ComplexityASTDetector) calculateNestingDepth(fn *ast.FuncDecl) int {
maxDepth := 0
var visit func(n ast.Node, depth int)
visit = func(n ast.Node, depth int) {
if depth > maxDepth {
maxDepth = depth
}
switch stmt := n.(type) {
case *ast.IfStmt:
visit(stmt.Body, depth+1)
if stmt.Else != nil {
visit(stmt.Else, depth+1)
}
case *ast.ForStmt:
visit(stmt.Body, depth+1)
case *ast.RangeStmt:
visit(stmt.Body, depth+1)
case *ast.SwitchStmt:
visit(stmt.Body, depth+1)
case *ast.SelectStmt:
visit(stmt.Body, depth+1)
case *ast.BlockStmt:
for _, s := range stmt.List {
visit(s, depth)
}
}
}
visit(fn.Body, 0)
return maxDepth
}
@@ -0,0 +1,523 @@
package analyzers
import (
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/yourorg/devour/internal/quality"
)
type TestCoverageDetector struct {
*quality.BaseDetector
minCoverage float64
}
func NewTestCoverageDetector(finder quality.FileFinder) *TestCoverageDetector {
return &TestCoverageDetector{
BaseDetector: quality.NewBaseDetector("test_coverage", quality.SeverityT3, finder),
minCoverage: 50.0,
}
}
func (d *TestCoverageDetector) Name() string {
return "test_coverage"
}
func (d *TestCoverageDetector) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *TestCoverageDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
coverFile := filepath.Join(path, "coverage.out")
_, err := exec.LookPath("go")
if err != nil {
return nil, nil
}
if _, err := os.Stat(coverFile); os.IsNotExist(err) {
cmd := exec.CommandContext(ctx, "go", "test", "-coverprofile=coverage.out", "-covermode=atomic", "./...")
cmd.Dir = path
cmd.Run()
if _, err := os.Stat(coverFile); os.IsNotExist(err) {
return nil, nil
}
}
coverage, err := d.parseCoverageFile(coverFile)
if err != nil {
return nil, err
}
var findings []quality.Finding
for file, cov := range coverage {
if cov.TotalLines == 0 {
continue
}
coveragePercent := float64(cov.CoveredLines) / float64(cov.TotalLines) * 100
if coveragePercent < d.minCoverage {
finding := quality.Finding{
ID: fmt.Sprintf("test_coverage::%s", file),
Type: "test_coverage",
Title: fmt.Sprintf("Low test coverage: %s (%.1f%%)", filepath.Base(file), coveragePercent),
Description: fmt.Sprintf("File '%s' has only %.1f%% test coverage (minimum: %.1f%%). Add more tests.", file, coveragePercent, d.minCoverage),
File: file,
Line: 1,
Severity: quality.SeverityT3,
Score: int((d.minCoverage - coveragePercent) / 10),
Status: quality.StatusOpen,
Metadata: map[string]string{
"coverage_percent": fmt.Sprintf("%.1f", coveragePercent),
"covered_lines": fmt.Sprintf("%d", cov.CoveredLines),
"total_lines": fmt.Sprintf("%d", cov.TotalLines),
"min_coverage": fmt.Sprintf("%.1f", d.minCoverage),
},
}
findings = append(findings, finding)
}
}
zeroCoverage := []string{}
for file, cov := range coverage {
if cov.CoveredLines == 0 && cov.TotalLines > 0 {
zeroCoverage = append(zeroCoverage, file)
}
}
if len(zeroCoverage) > 0 && len(zeroCoverage) <= 10 {
for _, file := range zeroCoverage {
finding := quality.Finding{
ID: fmt.Sprintf("no_test_coverage::%s", file),
Type: "test_coverage",
Title: fmt.Sprintf("No test coverage: %s", filepath.Base(file)),
Description: fmt.Sprintf("File '%s' has 0%% test coverage. Consider adding tests.", file),
File: file,
Line: 1,
Severity: quality.SeverityT2,
Score: 5,
Status: quality.StatusOpen,
Metadata: map[string]string{
"coverage_percent": "0",
"total_lines": fmt.Sprintf("%d", coverage[file].TotalLines),
},
}
findings = append(findings, finding)
}
}
return findings, nil
}
func (d *TestCoverageDetector) parseCoverageFile(path string) (map[string]CoverageInfo, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
coverage := make(map[string]CoverageInfo)
lines := strings.Split(string(data), "\n")
for _, line := range lines {
if line == "" || strings.HasPrefix(line, "mode:") {
continue
}
parts := strings.Split(line, " ")
if len(parts) < 3 {
continue
}
fileRange := parts[0]
colonIdx := strings.LastIndex(fileRange, ":")
if colonIdx == -1 {
continue
}
file := fileRange[:colonIdx]
rangeStr := fileRange[colonIdx+1:]
countStr := parts[2]
var count int
fmt.Sscanf(countStr, "%d", &count)
start, end := d.parseRange(rangeStr)
lines := end - start + 1
info := coverage[file]
info.TotalLines += lines
if count > 0 {
info.CoveredLines += lines
}
coverage[file] = info
}
return coverage, nil
}
func (d *TestCoverageDetector) parseRange(s string) (start, end int) {
parts := strings.Split(s, ",")
if len(parts) != 2 {
return 0, 0
}
fmt.Sscanf(parts[0], "%d", &start)
fmt.Sscanf(parts[1], "%d", &end)
return start, end
}
type CoverageInfo struct {
TotalLines int
CoveredLines int
}
type UntestedFuncDetector struct {
*quality.BaseDetector
}
func NewUntestedFuncDetector(finder quality.FileFinder) *UntestedFuncDetector {
return &UntestedFuncDetector{
BaseDetector: quality.NewBaseDetector("untested_func", quality.SeverityT2, finder),
}
}
func (d *UntestedFuncDetector) Name() string {
return "untested_func"
}
func (d *UntestedFuncDetector) Severity() quality.Severity {
return quality.SeverityT2
}
func (d *UntestedFuncDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
coverFile := filepath.Join(path, "coverage.out")
data, err := os.ReadFile(coverFile)
if err != nil {
return nil, nil
}
uncoveredFuncs := make(map[string][]UncoveredFunc)
lines := strings.Split(string(data), "\n")
for _, line := range lines {
if line == "" || strings.HasPrefix(line, "mode:") {
continue
}
parts := strings.Fields(line)
if len(parts) < 3 {
continue
}
countStr := parts[len(parts)-1]
var count int
fmt.Sscanf(countStr, "%d", &count)
if count == 0 {
fileRange := parts[0]
colonIdx := strings.LastIndex(fileRange, ":")
if colonIdx == -1 {
continue
}
file := fileRange[:colonIdx]
rangeStr := fileRange[colonIdx+1:]
start, _ := d.parseRange(rangeStr)
funcName := d.findFuncAtLine(file, start)
if funcName != "" {
uncoveredFuncs[file] = append(uncoveredFuncs[file], UncoveredFunc{
Name: funcName,
Line: start,
})
}
}
}
var findings []quality.Finding
for file, funcs := range uncoveredFuncs {
seen := make(map[string]bool)
for _, fn := range funcs {
if seen[fn.Name] {
continue
}
seen[fn.Name] = true
if strings.HasPrefix(fn.Name, "Test") || fn.Name == "main" || fn.Name == "init" {
continue
}
finding := quality.Finding{
ID: fmt.Sprintf("untested_func::%s::%s", file, fn.Name),
Type: "test_coverage",
Title: fmt.Sprintf("Untested function: %s", fn.Name),
Description: fmt.Sprintf("Function '%s' in %s has no test coverage.", fn.Name, filepath.Base(file)),
File: file,
Line: fn.Line,
Severity: quality.SeverityT2,
Score: 3,
Status: quality.StatusOpen,
Metadata: map[string]string{
"function": fn.Name,
},
}
findings = append(findings, finding)
}
}
return findings, nil
}
func (d *UntestedFuncDetector) parseRange(s string) (start, end int) {
parts := strings.Split(s, ",")
if len(parts) != 2 {
return 0, 0
}
fmt.Sscanf(parts[0], "%d", &start)
fmt.Sscanf(parts[1], "%d", &end)
return start, end
}
func (d *UntestedFuncDetector) findFuncAtLine(file string, line int) string {
data, err := os.ReadFile(file)
if err != nil {
return ""
}
lines := strings.Split(string(data), "\n")
if line > len(lines) {
return ""
}
for i := line - 1; i >= 0 && i >= line-20; i-- {
l := lines[i]
if strings.HasPrefix(strings.TrimSpace(l), "func ") {
parts := strings.Fields(strings.TrimSpace(l))
if len(parts) >= 2 {
name := parts[1]
if idx := strings.Index(name, "("); idx > 0 {
name = name[:idx]
}
return name
}
}
}
return ""
}
type UncoveredFunc struct {
Name string
Line int
}
type OrphanedFileDetector struct {
*quality.BaseDetector
}
func NewOrphanedFileDetector(finder quality.FileFinder) *OrphanedFileDetector {
return &OrphanedFileDetector{
BaseDetector: quality.NewBaseDetector("orphaned_file", quality.SeverityT3, finder),
}
}
func (d *OrphanedFileDetector) Name() string {
return "orphaned_file"
}
func (d *OrphanedFileDetector) Severity() quality.Severity {
return quality.SeverityT3
}
func (d *OrphanedFileDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
testFiles := make(map[string]bool)
for _, file := range files {
if strings.HasSuffix(file, "_test.go") {
base := strings.TrimSuffix(filepath.Base(file), "_test.go")
dir := filepath.Dir(file)
testFiles[filepath.Join(dir, base+".go")] = true
}
}
var findings []quality.Finding
for _, file := range files {
if strings.HasSuffix(file, "_test.go") {
continue
}
if strings.Contains(file, "/cmd/") || strings.Contains(file, "\\cmd\\") {
continue
}
base := filepath.Base(file)
if strings.HasPrefix(base, "main.go") || strings.HasPrefix(base, "doc.go") {
continue
}
if !testFiles[file] {
dir := filepath.Dir(file)
files, _ := os.ReadDir(dir)
goCount := 0
testCount := 0
for _, f := range files {
if strings.HasSuffix(f.Name(), ".go") && !strings.HasSuffix(f.Name(), "_test.go") {
goCount++
}
if strings.HasSuffix(f.Name(), "_test.go") {
testCount++
}
}
if goCount > 1 && testCount > 0 {
finding := quality.Finding{
ID: fmt.Sprintf("orphaned_file::%s", file),
Type: "orphaned_file",
Title: fmt.Sprintf("File without dedicated tests: %s", filepath.Base(file)),
Description: fmt.Sprintf("File '%s' has no corresponding _test.go file, but sibling files do. Consider adding tests.", file),
File: file,
Line: 1,
Severity: quality.SeverityT3,
Score: 2,
Status: quality.StatusOpen,
Metadata: map[string]string{
"sibling_tests": fmt.Sprintf("%d", testCount),
"sibling_go": fmt.Sprintf("%d", goCount),
},
}
findings = append(findings, finding)
}
}
}
return findings, nil
}
type DeprecatedUsageDetector struct {
*quality.BaseDetector
}
func NewDeprecatedUsageDetector(finder quality.FileFinder) *DeprecatedUsageDetector {
return &DeprecatedUsageDetector{
BaseDetector: quality.NewBaseDetector("deprecated", quality.SeverityT2, finder),
}
}
func (d *DeprecatedUsageDetector) Name() string {
return "deprecated"
}
func (d *DeprecatedUsageDetector) Severity() quality.Severity {
return quality.SeverityT2
}
func (d *DeprecatedUsageDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) {
files, err := d.FindFiles(path, "go")
if err != nil {
return nil, err
}
var findings []quality.Finding
for _, file := range files {
if strings.HasSuffix(file, "_test.go") {
continue
}
data, err := os.ReadFile(file)
if err != nil {
continue
}
content := string(data)
deprecatedPatterns := []struct {
pattern string
alt string
}{
{"io/ioutil", "io and os packages"},
{"context.WithDeadline", "context.WithTimeout for relative times"},
{"interface{}", "any"},
}
for _, p := range deprecatedPatterns {
if strings.Contains(content, p.pattern) {
finding := quality.Finding{
ID: fmt.Sprintf("deprecated::%s::%s", file, p.pattern),
Type: "deprecated",
Title: fmt.Sprintf("Deprecated usage: %s", p.pattern),
Description: fmt.Sprintf("Found deprecated '%s'. Use %s instead.", p.pattern, p.alt),
File: file,
Line: 1,
Severity: quality.SeverityT2,
Score: 3,
Status: quality.StatusOpen,
Metadata: map[string]string{
"deprecated": p.pattern,
"alternative": p.alt,
},
}
findings = append(findings, finding)
}
}
}
return findings, nil
}
func ParseGoTestJSON(output []byte) ([]TestResult, error) {
var results []TestResult
lines := strings.Split(string(output), "\n")
for _, line := range lines {
if line == "" {
continue
}
var event TestEvent
if err := json.Unmarshal([]byte(line), &event); err != nil {
continue
}
if event.Action == "pass" || event.Action == "fail" {
results = append(results, TestResult{
Package: event.Package,
Test: event.Test,
Elapsed: event.Elapsed,
Action: event.Action,
})
}
}
return results, nil
}
type TestEvent struct {
Time string `json:"Time"`
Action string `json:"Action"`
Package string `json:"Package"`
Test string `json:"Test"`
Elapsed float64 `json:"Elapsed"`
Output string `json:"Output"`
}
type TestResult struct {
Package string
Test string
Elapsed float64
Action string
}
@@ -0,0 +1,276 @@
package fixers
import (
"context"
"fmt"
"go/ast"
"go/format"
"go/parser"
"go/token"
"os"
"strings"
"github.com/yourorg/devour/internal/quality"
"github.com/yourorg/devour/internal/quality/plugins"
)
type DeadCodeFixer struct{}
func NewDeadCodeFixer() *DeadCodeFixer {
return &DeadCodeFixer{}
}
func (f *DeadCodeFixer) Name() string {
return "dead_code"
}
func (f *DeadCodeFixer) Description() string {
return "Comments out or removes unused exported functions/types"
}
func (f *DeadCodeFixer) CanFix(finding quality.Finding) bool {
return finding.Type == "dead_code" && finding.Severity == quality.SeverityT1
}
func (f *DeadCodeFixer) Fix(ctx context.Context, finding quality.Finding, dryRun bool) (*plugins.FixResult, error) {
name := finding.Metadata["name"]
if name == "" {
return nil, fmt.Errorf("no function/type name in metadata")
}
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, finding.File, nil, parser.ParseComments)
if err != nil {
return nil, fmt.Errorf("parse error: %w", err)
}
if dryRun {
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Would comment out unused '%s' in %s", name, finding.File),
}, nil
}
var targetDecl ast.Decl
for _, decl := range node.Decls {
switch d := decl.(type) {
case *ast.FuncDecl:
if d.Name.Name == name {
targetDecl = d
}
case *ast.GenDecl:
for _, spec := range d.Specs {
if ts, ok := spec.(*ast.TypeSpec); ok && ts.Name.Name == name {
targetDecl = d
}
}
}
if targetDecl != nil {
comment := &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// DEPRECATED: This code is unused and should be removed"},
},
}
if targetDecl.(*ast.FuncDecl) != nil {
targetDecl.(*ast.FuncDecl).Doc = comment
} else if targetDecl.(*ast.GenDecl) != nil {
targetDecl.(*ast.GenDecl).Doc = comment
}
break
}
}
if targetDecl == nil {
return &plugins.FixResult{
Success: false,
Message: fmt.Sprintf("Could not find '%s' in file", name),
}, nil
}
var output strings.Builder
if err := format.Node(&output, fset, node); err != nil {
return nil, fmt.Errorf("format error: %w", err)
}
if err := os.WriteFile(finding.File, []byte(output.String()), 0644); err != nil {
return nil, fmt.Errorf("write error: %w", err)
}
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Marked '%s' as deprecated in %s", name, finding.File),
}, nil
}
type ComplexityHintFixer struct{}
func NewComplexityHintFixer() *ComplexityHintFixer {
return &ComplexityHintFixer{}
}
func (f *ComplexityHintFixer) Name() string {
return "complexity_hint"
}
func (f *ComplexityHintFixer) Description() string {
return "Adds complexity warning comments to complex functions"
}
func (f *ComplexityHintFixer) CanFix(finding quality.Finding) bool {
return finding.Type == "complexity" || finding.Type == "complexity_ast"
}
func (f *ComplexityHintFixer) Fix(ctx context.Context, finding quality.Finding, dryRun bool) (*plugins.FixResult, error) {
funcName := finding.Metadata["function"]
if funcName == "" {
return nil, fmt.Errorf("no function name in metadata")
}
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, finding.File, nil, parser.ParseComments)
if err != nil {
return nil, fmt.Errorf("parse error: %w", err)
}
if dryRun {
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Would add complexity warning to '%s' in %s", funcName, finding.File),
}, nil
}
for _, decl := range node.Decls {
if fn, ok := decl.(*ast.FuncDecl); ok && fn.Name.Name == funcName {
complexity := finding.Metadata["complexity"]
warning := fmt.Sprintf("// FIXME: High complexity (%s). Consider breaking into smaller functions.", complexity)
comment := &ast.CommentGroup{
List: []*ast.Comment{
{Text: warning},
},
}
fn.Doc = comment
break
}
}
var output strings.Builder
if err := format.Node(&output, fset, node); err != nil {
return nil, fmt.Errorf("format error: %w", err)
}
if err := os.WriteFile(finding.File, []byte(output.String()), 0644); err != nil {
return nil, fmt.Errorf("write error: %w", err)
}
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Added complexity warning to '%s' in %s", funcName, finding.File),
}, nil
}
type IoutilFixer struct{}
func NewIoutilFixer() *IoutilFixer {
return &IoutilFixer{}
}
func (f *IoutilFixer) Name() string {
return "ioutil"
}
func (f *IoutilFixer) Description() string {
return "Replaces deprecated io/ioutil with modern equivalents"
}
func (f *IoutilFixer) CanFix(finding quality.Finding) bool {
return finding.Type == "deprecated" && strings.Contains(finding.Title, "io/ioutil")
}
func (f *IoutilFixer) Fix(ctx context.Context, finding quality.Finding, dryRun bool) (*plugins.FixResult, error) {
data, err := os.ReadFile(finding.File)
if err != nil {
return nil, fmt.Errorf("read error: %w", err)
}
content := string(data)
replacements := map[string]string{
`"io/ioutil"`: "",
`ioutil.ReadFile`: `os.ReadFile`,
`ioutil.WriteFile`: `os.WriteFile`,
`ioutil.ReadDir`: `os.ReadDir`,
`ioutil.TempDir`: `os.MkdirTemp`,
`ioutil.TempFile`: `os.CreateTemp`,
`ioutil.NopCloser`: `io.NopCloser`,
`ioutil.ReadAll`: `io.ReadAll`,
`ioutil.Discard`: `io.Discard`,
}
if dryRun {
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Would replace io/ioutil usage in %s", finding.File),
}, nil
}
for old, new := range replacements {
content = strings.ReplaceAll(content, old, new)
}
if strings.Contains(content, "os.ReadFile") || strings.Contains(content, "os.WriteFile") ||
strings.Contains(content, "os.ReadDir") || strings.Contains(content, "os.MkdirTemp") ||
strings.Contains(content, "os.CreateTemp") {
if !strings.Contains(content, `"os"`) {
content = strings.Replace(content, "package ", "import \"os\"\n\npackage ", 1)
}
}
if strings.Contains(content, "io.NopCloser") || strings.Contains(content, "io.ReadAll") ||
strings.Contains(content, "io.Discard") {
if !strings.Contains(content, `"io"`) {
content = strings.Replace(content, "package ", "import \"io\"\n\npackage ", 1)
}
}
if err := os.WriteFile(finding.File, []byte(content), 0644); err != nil {
return nil, fmt.Errorf("write error: %w", err)
}
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Replaced io/ioutil in %s", finding.File),
}, nil
}
type DocCommentFixer struct{}
func NewDocCommentFixer() *DocCommentFixer {
return &DocCommentFixer{}
}
func (f *DocCommentFixer) Name() string {
return "doc_comment"
}
func (f *DocCommentFixer) Description() string {
return "Adds TODO comments for missing documentation on exported items"
}
func (f *DocCommentFixer) CanFix(finding quality.Finding) bool {
return finding.Type == "naming" || finding.Type == "god_struct" || finding.Type == "god_function"
}
func (f *DocCommentFixer) Fix(ctx context.Context, finding quality.Finding, dryRun bool) (*plugins.FixResult, error) {
return &plugins.FixResult{
Success: false,
Message: "Documentation fixer requires manual intervention",
Warnings: []string{
fmt.Sprintf("Add documentation for: %s", finding.Title),
fmt.Sprintf("Location: %s:%d", finding.File, finding.Line),
},
}, nil
}
@@ -0,0 +1,124 @@
package fixers
import (
"context"
"fmt"
"go/ast"
"go/format"
"go/parser"
"go/token"
"os"
"strings"
"github.com/yourorg/devour/internal/quality"
"github.com/yourorg/devour/internal/quality/plugins"
)
type UnusedImportFixer struct{}
func NewUnusedImportFixer() *UnusedImportFixer {
return &UnusedImportFixer{}
}
func (f *UnusedImportFixer) Name() string {
return "unused_import"
}
func (f *UnusedImportFixer) Description() string {
return "Removes unused import statements"
}
func (f *UnusedImportFixer) CanFix(finding quality.Finding) bool {
return finding.Type == "unused_import"
}
func (f *UnusedImportFixer) Fix(ctx context.Context, finding quality.Finding, dryRun bool) (*plugins.FixResult, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, finding.File, nil, parser.ParseComments)
if err != nil {
return nil, fmt.Errorf("parse error: %w", err)
}
importToRemove := finding.Metadata["import_path"]
if importToRemove == "" {
return nil, fmt.Errorf("no import_path in finding metadata")
}
var newImports []*ast.ImportSpec
for _, imp := range node.Imports {
path := strings.Trim(imp.Path.Value, `"`)
if path != importToRemove {
newImports = append(newImports, imp)
}
}
node.Imports = newImports
if dryRun {
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Would remove import '%s' from %s", importToRemove, finding.File),
}, nil
}
var output strings.Builder
if err := format.Node(&output, fset, node); err != nil {
return nil, fmt.Errorf("format error: %w", err)
}
if err := os.WriteFile(finding.File, []byte(output.String()), 0644); err != nil {
return nil, fmt.Errorf("write error: %w", err)
}
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Removed unused import '%s' from %s", importToRemove, finding.File),
}, nil
}
type FormattingFixer struct{}
func NewFormattingFixer() *FormattingFixer {
return &FormattingFixer{}
}
func (f *FormattingFixer) Name() string {
return "format"
}
func (f *FormattingFixer) Description() string {
return "Formats Go source files using gofmt style"
}
func (f *FormattingFixer) CanFix(finding quality.Finding) bool {
return finding.Type == "formatting" || finding.Type == "style"
}
func (f *FormattingFixer) Fix(ctx context.Context, finding quality.Finding, dryRun bool) (*plugins.FixResult, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, finding.File, nil, parser.ParseComments)
if err != nil {
return nil, fmt.Errorf("parse error: %w", err)
}
if dryRun {
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Would format %s", finding.File),
}, nil
}
var output strings.Builder
if err := format.Node(&output, fset, node); err != nil {
return nil, fmt.Errorf("format error: %w", err)
}
if err := os.WriteFile(finding.File, []byte(output.String()), 0644); err != nil {
return nil, fmt.Errorf("write error: %w", err)
}
return &plugins.FixResult{
Success: true,
Message: fmt.Sprintf("Formatted %s", finding.File),
}, nil
}
+363
View File
@@ -0,0 +1,363 @@
package goplugin
import (
"context"
"fmt"
"go/ast"
"go/parser"
"go/token"
"go/types"
"os"
"path/filepath"
"strings"
"github.com/yourorg/devour/internal/quality"
"github.com/yourorg/devour/internal/quality/plugins"
"github.com/yourorg/devour/internal/quality/plugins/go/analyzers"
"github.com/yourorg/devour/internal/quality/plugins/go/fixers"
"golang.org/x/tools/go/packages"
)
type GoPlugin struct{}
func New() *GoPlugin {
return &GoPlugin{}
}
func (p *GoPlugin) Name() string {
return "go"
}
func (p *GoPlugin) Extensions() []string {
return []string{".go"}
}
func (p *GoPlugin) MarkerFiles() []string {
return []string{"go.mod", "go.sum"}
}
func (p *GoPlugin) DefaultSrcDir() string {
return "."
}
func (p *GoPlugin) CreateDetectors(finder quality.FileFinder) []quality.Detector {
return []quality.Detector{
analyzers.NewDeadCodeDetector(finder),
analyzers.NewEnhancedDeadCodeDetector(finder),
analyzers.NewUnusedImportDetector(finder),
analyzers.NewCycleDetector(finder),
analyzers.NewSecurityDetector(finder),
analyzers.NewComplexityASTDetector(finder),
analyzers.NewLargeFileDetector(finder),
analyzers.NewGodStructDetector(finder),
analyzers.NewGodFunctionDetector(finder),
analyzers.NewDebugLogDetector(finder),
analyzers.NewSingleUseDetector(finder),
analyzers.NewCouplingDetector(finder),
analyzers.NewTestCoverageDetector(finder),
analyzers.NewUntestedFuncDetector(finder),
analyzers.NewOrphanedFileDetector(finder),
analyzers.NewDeprecatedUsageDetector(finder),
}
}
func (p *GoPlugin) CreateFixers() []plugins.Fixer {
return []plugins.Fixer{
fixers.NewUnusedImportFixer(),
fixers.NewFormattingFixer(),
fixers.NewDeadCodeFixer(),
fixers.NewComplexityHintFixer(),
fixers.NewIoutilFixer(),
fixers.NewDocCommentFixer(),
}
}
func (p *GoPlugin) AnalyzeFile(ctx context.Context, path string, config *quality.Config) (*plugins.FileAnalysis, error) {
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, path, nil, parser.ParseComments|parser.AllErrors)
if err != nil {
return nil, fmt.Errorf("parse error: %w", err)
}
analysis := &plugins.FileAnalysis{
Path: path,
Package: node.Name.Name,
LOC: countLOC(path),
}
analysis.Imports = p.extractImports(node, fset)
analysis.Functions = p.extractFunctions(node, path, fset)
analysis.Types = p.extractTypes(node, path, fset)
analysis.Variables = p.extractVariables(node, path, fset)
analysis.Comments = p.extractComments(node, path, fset)
return analysis, nil
}
func (p *GoPlugin) BuildDependencyGraph(ctx context.Context, rootPath string) (*plugins.DependencyGraph, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedImports | packages.NeedFiles,
Dir: rootPath,
}
pkgs, err := packages.Load(cfg, "./...")
if err != nil {
return nil, fmt.Errorf("failed to load packages: %w", err)
}
graph := &plugins.DependencyGraph{
Packages: make(map[string]*plugins.PackageNode),
Edges: []plugins.DependencyEdge{},
}
for _, pkg := range pkgs {
node := &plugins.PackageNode{
Name: pkg.Name,
Path: pkg.PkgPath,
Files: pkg.GoFiles,
IsLocal: true,
}
for _, imp := range pkg.Imports {
node.Imports = append(node.Imports, imp.PkgPath)
graph.Edges = append(graph.Edges, plugins.DependencyEdge{
From: pkg.PkgPath,
To: imp.PkgPath,
Type: plugins.EdgeTypeImport,
})
}
graph.Packages[pkg.PkgPath] = node
}
graph.Cycles = p.detectCycles(graph)
return graph, nil
}
func (p *GoPlugin) extractImports(node *ast.File, fset *token.FileSet) []plugins.ImportInfo {
var imports []plugins.ImportInfo
for _, imp := range node.Imports {
info := plugins.ImportInfo{
Path: strings.Trim(imp.Path.Value, `"`),
Line: fset.Position(imp.Pos()).Line,
}
if imp.Name != nil {
info.Alias = imp.Name.Name
}
imports = append(imports, info)
}
return imports
}
func (p *GoPlugin) extractFunctions(node *ast.File, path string, fset *token.FileSet) []quality.FunctionInfo {
var functions []quality.FunctionInfo
for _, decl := range node.Decls {
fn, ok := decl.(*ast.FuncDecl)
if !ok {
continue
}
info := quality.FunctionInfo{
Name: fn.Name.Name,
File: path,
Line: fset.Position(fn.Pos()).Line,
EndLine: fset.Position(fn.End()).Line,
}
info.LOC = info.EndLine - info.Line + 1
var params []string
if fn.Type.Params != nil {
for _, field := range fn.Type.Params.List {
for _, name := range field.Names {
params = append(params, name.Name)
}
}
}
info.Params = params
if fn.Type.Results != nil {
info.ReturnAnnotation = fmt.Sprintf("%v", fn.Type.Results)
}
functions = append(functions, info)
}
return functions
}
func (p *GoPlugin) extractTypes(node *ast.File, path string, fset *token.FileSet) []plugins.TypeInfo {
var typeInfos []plugins.TypeInfo
for _, decl := range node.Decls {
gen, ok := decl.(*ast.GenDecl)
if !ok || gen.Tok != token.TYPE {
continue
}
for _, spec := range gen.Specs {
typeSpec, ok := spec.(*ast.TypeSpec)
if !ok {
continue
}
info := plugins.TypeInfo{
Name: typeSpec.Name.Name,
File: path,
Line: fset.Position(typeSpec.Pos()).Line,
IsExported: ast.IsExported(typeSpec.Name.Name),
}
switch t := typeSpec.Type.(type) {
case *ast.StructType:
info.Underlying = "struct"
case *ast.InterfaceType:
info.Underlying = "interface"
default:
info.Underlying = fmt.Sprintf("%T", t)
}
typeInfos = append(typeInfos, info)
}
}
return typeInfos
}
func (p *GoPlugin) extractVariables(node *ast.File, path string, fset *token.FileSet) []plugins.VariableInfo {
var variables []plugins.VariableInfo
for _, decl := range node.Decls {
gen, ok := decl.(*ast.GenDecl)
if !ok || (gen.Tok != token.VAR && gen.Tok != token.CONST) {
continue
}
for _, spec := range gen.Specs {
valueSpec, ok := spec.(*ast.ValueSpec)
if !ok {
continue
}
for _, name := range valueSpec.Names {
info := plugins.VariableInfo{
Name: name.Name,
File: path,
Line: fset.Position(name.Pos()).Line,
IsExported: ast.IsExported(name.Name),
}
if valueSpec.Type != nil {
info.Type = fmt.Sprintf("%v", valueSpec.Type)
}
variables = append(variables, info)
}
}
}
return variables
}
func (p *GoPlugin) extractComments(node *ast.File, path string, fset *token.FileSet) []plugins.CommentInfo {
var comments []plugins.CommentInfo
for _, group := range node.Comments {
for _, comment := range group.List {
info := plugins.CommentInfo{
Text: comment.Text,
File: path,
Line: fset.Position(comment.Pos()).Line,
IsDoc: strings.HasPrefix(comment.Text, "//"),
}
comments = append(comments, info)
}
}
return comments
}
func (p *GoPlugin) detectCycles(graph *plugins.DependencyGraph) [][]string {
var cycles [][]string
visited := make(map[string]bool)
recStack := make(map[string]bool)
path := []string{}
var dfs func(pkg string) bool
dfs = func(pkg string) bool {
visited[pkg] = true
recStack[pkg] = true
path = append(path, pkg)
node, exists := graph.Packages[pkg]
if !exists {
return false
}
for _, imp := range node.Imports {
if !visited[imp] {
if dfs(imp) {
return true
}
} else if recStack[imp] {
cycleStart := -1
for i, p := range path {
if p == imp {
cycleStart = i
break
}
}
if cycleStart >= 0 {
cycle := make([]string, len(path)-cycleStart)
copy(cycle, path[cycleStart:])
cycles = append(cycles, cycle)
}
}
}
path = path[:len(path)-1]
recStack[pkg] = false
return false
}
for pkg := range graph.Packages {
if !visited[pkg] {
dfs(pkg)
}
}
return cycles
}
func (p *GoPlugin) LoadTypesInfo(ctx context.Context, path string) (*types.Info, *token.FileSet, error) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo,
Dir: filepath.Dir(path),
}
pkgs, err := packages.Load(cfg, filepath.Base(path))
if err != nil {
return nil, nil, err
}
if len(pkgs) == 0 {
return nil, nil, fmt.Errorf("no packages found")
}
return pkgs[0].TypesInfo, pkgs[0].Fset, nil
}
func countLOC(path string) int {
data, err := os.ReadFile(path)
if err != nil {
return 0
}
return strings.Count(string(data), "\n") + 1
}
func init() {
plugins.Register(New())
}
+163
View File
@@ -0,0 +1,163 @@
package plugins
import (
"context"
"go/ast"
"go/token"
"go/types"
"github.com/yourorg/devour/internal/quality"
)
// LanguagePlugin defines the interface for language-specific analysis plugins
type LanguagePlugin interface {
// Name returns the plugin name (e.g., "go", "typescript")
Name() string
// Extensions returns file extensions this plugin handles
Extensions() []string
// MarkerFiles returns files that indicate this language (e.g., "go.mod")
MarkerFiles() []string
// DefaultSrcDir returns the default source directory
DefaultSrcDir() string
// CreateDetectors creates language-specific detectors
CreateDetectors(finder quality.FileFinder) []quality.Detector
// CreateFixers creates language-specific auto-fixers
CreateFixers() []Fixer
// AnalyzeFile performs AST analysis on a single file
AnalyzeFile(ctx context.Context, path string, config *quality.Config) (*FileAnalysis, error)
// BuildDependencyGraph builds the import dependency graph
BuildDependencyGraph(ctx context.Context, path string) (*DependencyGraph, error)
}
// FileAnalysis represents the result of analyzing a single file
type FileAnalysis struct {
Path string
Package string
Imports []ImportInfo
Functions []quality.FunctionInfo
Classes []quality.ClassInfo
Variables []VariableInfo
Types []TypeInfo
Comments []CommentInfo
LOC int
Complexity int
Issues []quality.Finding
}
// ImportInfo represents import information
type ImportInfo struct {
Path string
Alias string
Line int
Used bool
Position token.Position
}
// VariableInfo represents variable information
type VariableInfo struct {
Name string
Type string
File string
Line int
IsExported bool
IsUsed bool
}
// TypeInfo represents type information
type TypeInfo struct {
Name string
Underlying string
Methods []string
File string
Line int
IsExported bool
}
// CommentInfo represents comment information
type CommentInfo struct {
Text string
File string
Line int
IsDoc bool
Attached string // What it's attached to (function name, type name, etc.)
}
// DependencyGraph represents the import dependency graph
type DependencyGraph struct {
Packages map[string]*PackageNode
Edges []DependencyEdge
Cycles [][]string
}
// PackageNode represents a node in the dependency graph
type PackageNode struct {
Name string
Path string
Imports []string
ImportedBy []string
Files []string
IsLocal bool
}
// DependencyEdge represents an edge in the dependency graph
type DependencyEdge struct {
From string
To string
Type EdgeType
}
// EdgeType represents the type of dependency edge
type EdgeType string
const (
EdgeTypeImport EdgeType = "import"
EdgeTypeEmbed EdgeType = "embed"
EdgeTypeInternal EdgeType = "internal"
)
// Fixer defines the interface for auto-fixers
type Fixer interface {
// Name returns the fixer name
Name() string
// Description returns a human-readable description
Description() string
// CanFix checks if this fixer can fix the given finding
CanFix(finding quality.Finding) bool
// Fix applies the fix and returns the patches
Fix(ctx context.Context, finding quality.Finding, dryRun bool) (*FixResult, error)
}
// FixResult represents the result of a fix operation
type FixResult struct {
Success bool
Patches []Patch
Message string
Warnings []string
}
// Patch represents a single file patch
type Patch struct {
File string
OldText string
NewText string
Start int
End int
}
// ASTInfo represents AST information for analysis
type ASTInfo struct {
File *ast.File
Fset *token.FileSet
Types *types.Info
Package *types.Package
}
+117
View File
@@ -0,0 +1,117 @@
package plugins
import (
"fmt"
"sort"
"sync"
)
// Registry manages language plugin registration
type Registry struct {
mu sync.RWMutex
plugins map[string]LanguagePlugin
}
// Global registry instance
var globalRegistry = &Registry{
plugins: make(map[string]LanguagePlugin),
}
// Register registers a language plugin
func Register(plugin LanguagePlugin) error {
globalRegistry.mu.Lock()
defer globalRegistry.mu.Unlock()
name := plugin.Name()
if _, exists := globalRegistry.plugins[name]; exists {
return fmt.Errorf("plugin %s already registered", name)
}
globalRegistry.plugins[name] = plugin
return nil
}
// Get retrieves a plugin by name
func Get(name string) (LanguagePlugin, bool) {
globalRegistry.mu.RLock()
defer globalRegistry.mu.RUnlock()
plugin, ok := globalRegistry.plugins[name]
return plugin, ok
}
// All returns all registered plugins
func All() []LanguagePlugin {
globalRegistry.mu.RLock()
defer globalRegistry.mu.RUnlock()
plugins := make([]LanguagePlugin, 0, len(globalRegistry.plugins))
for _, p := range globalRegistry.plugins {
plugins = append(plugins, p)
}
// Sort by name for consistent ordering
sort.Slice(plugins, func(i, j int) bool {
return plugins[i].Name() < plugins[j].Name()
})
return plugins
}
// Names returns all registered plugin names
func Names() []string {
globalRegistry.mu.RLock()
defer globalRegistry.mu.RUnlock()
names := make([]string, 0, len(globalRegistry.plugins))
for name := range globalRegistry.plugins {
names = append(names, name)
}
sort.Strings(names)
return names
}
// DetectLanguage attempts to detect the language from a path
func DetectLanguage(path string) string {
globalRegistry.mu.RLock()
defer globalRegistry.mu.RUnlock()
// Check marker files for each plugin
for _, plugin := range globalRegistry.plugins {
for _, marker := range plugin.MarkerFiles() {
// Check if marker file exists
if fileExists(path + "/" + marker) {
return plugin.Name()
}
}
}
// Default to first registered plugin
for name := range globalRegistry.plugins {
return name
}
return ""
}
// GetForExtension returns the plugin for a given file extension
func GetForExtension(ext string) LanguagePlugin {
globalRegistry.mu.RLock()
defer globalRegistry.mu.RUnlock()
for _, plugin := range globalRegistry.plugins {
for _, pluginExt := range plugin.Extensions() {
if pluginExt == ext {
return plugin
}
}
}
return nil
}
// fileExists is a simple helper - will be replaced with proper implementation
func fileExists(path string) bool {
// This will be replaced with actual file existence check
return false
}
+315
View File
@@ -0,0 +1,315 @@
package review
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
"github.com/yourorg/devour/internal/quality"
)
type ReviewPacket struct {
Generated time.Time `json:"generated"`
ProjectPath string `json:"project_path"`
Language string `json:"language"`
Scorecard *quality.Scorecard `json:"scorecard"`
Findings []FindingReview `json:"findings"`
Context ReviewContext `json:"context"`
Questions []ReviewQuestion `json:"questions"`
}
type FindingReview struct {
ID string `json:"id"`
Type string `json:"type"`
Title string `json:"title"`
Description string `json:"description"`
File string `json:"file"`
Line int `json:"line"`
Severity quality.Severity `json:"severity"`
Score int `json:"score"`
Status quality.Status `json:"status"`
NeedsReview bool `json:"needs_review"`
Context string `json:"context"`
Metadata map[string]string `json:"metadata"`
}
type ReviewContext struct {
TotalFiles int `json:"total_files"`
TotalLOC int `json:"total_loc"`
FindingsByDim map[string]int `json:"findings_by_dimension"`
TopIssues []string `json:"top_issues"`
Trends map[string]string `json:"trends"`
}
type ReviewQuestion struct {
ID string `json:"id"`
Category string `json:"category"`
Question string `json:"question"`
Options []string `json:"options,omitempty"`
}
type PacketGenerator struct {
dataDir string
}
func NewPacketGenerator(dataDir string) *PacketGenerator {
return &PacketGenerator{dataDir: dataDir}
}
func (g *PacketGenerator) Generate(findings []quality.Finding, scorecard *quality.Scorecard, lang string) (*ReviewPacket, error) {
packet := &ReviewPacket{
Generated: time.Now(),
ProjectPath: g.dataDir,
Language: lang,
Scorecard: scorecard,
Findings: g.convertFindings(findings),
Context: g.buildContext(findings),
Questions: g.generateQuestions(findings),
}
return packet, nil
}
func (g *PacketGenerator) convertFindings(findings []quality.Finding) []FindingReview {
var reviews []FindingReview
for _, f := range findings {
if f.Status != quality.StatusOpen {
continue
}
review := FindingReview{
ID: f.ID,
Type: f.Type,
Title: f.Title,
Description: f.Description,
File: f.File,
Line: f.Line,
Severity: f.Severity,
Score: f.Score,
Status: f.Status,
NeedsReview: f.Severity >= quality.SeverityT3,
Metadata: f.Metadata,
}
review.Context = g.generateContext(f)
reviews = append(reviews, review)
}
return reviews
}
func (g *PacketGenerator) generateContext(f quality.Finding) string {
switch f.Type {
case "complexity", "complexity_ast":
return "This function may be difficult to maintain. Consider if it can be simplified or broken down."
case "duplication":
return "Similar code exists elsewhere. Consider extracting common functionality."
case "dead_code":
return "This code appears unused. Verify before removing - it may be called via reflection or external tools."
case "security":
return "Potential security concern. Review carefully and consider security implications."
case "import_cycle":
return "Circular dependency detected. This can cause initialization issues and makes code harder to understand."
default:
return "Review this finding and decide if it needs addressing."
}
}
func (g *PacketGenerator) buildContext(findings []quality.Finding) ReviewContext {
byDim := make(map[string]int)
var topIssues []string
for _, f := range findings {
if f.Status == quality.StatusOpen {
dim := g.classifyDimension(f)
byDim[dim]++
}
}
topCount := 0
for _, f := range findings {
if f.Status == quality.StatusOpen && topCount < 5 {
topIssues = append(topIssues, fmt.Sprintf("%s: %s", f.Type, f.Title))
topCount++
}
}
return ReviewContext{
FindingsByDim: byDim,
TopIssues: topIssues,
Trends: make(map[string]string),
}
}
func (g *PacketGenerator) classifyDimension(f quality.Finding) string {
switch f.Type {
case "complexity", "complexity_ast":
return "Code Quality"
case "duplication":
return "Duplication"
case "dead_code", "unused_import", "unused":
return "File Health"
case "security":
return "Security"
case "naming":
return "Naming Quality"
case "import_cycle":
return "Architecture"
default:
return "Other"
}
}
func (g *PacketGenerator) generateQuestions(findings []quality.Finding) []ReviewQuestion {
var questions []ReviewQuestion
hasDupes := false
hasComplex := false
hasDead := false
for _, f := range findings {
if f.Status != quality.StatusOpen {
continue
}
switch f.Type {
case "duplication":
hasDupes = true
case "complexity", "complexity_ast":
hasComplex = true
case "dead_code":
hasDead = true
}
}
if hasDupes {
questions = append(questions, ReviewQuestion{
ID: "dupe_strategy",
Category: "duplication",
Question: "How should duplicated code be consolidated?",
Options: []string{
"Extract to shared utility",
"Keep separate (different use cases)",
"Refactor to common interface",
},
})
}
if hasComplex {
questions = append(questions, ReviewQuestion{
ID: "complexity_strategy",
Category: "complexity",
Question: "What's the best approach for complex functions?",
Options: []string{
"Break into smaller functions",
"Introduce helper types",
"Accept current complexity",
},
})
}
if hasDead {
questions = append(questions, ReviewQuestion{
ID: "dead_code_strategy",
Category: "maintenance",
Question: "Should unused code be removed?",
Options: []string{
"Remove if truly unused",
"Keep for future use",
"Mark as deprecated",
},
})
}
questions = append(questions, ReviewQuestion{
ID: "priority",
Category: "planning",
Question: "Which area should be prioritized for improvement?",
Options: []string{
"Security issues first",
"Complexity reduction",
"Dead code cleanup",
"Architecture improvements",
},
})
return questions
}
func (g *PacketGenerator) Save(packet *ReviewPacket, filename string) error {
reviewDir := filepath.Join(g.dataDir, "review")
if err := os.MkdirAll(reviewDir, 0755); err != nil {
return fmt.Errorf("failed to create review directory: %w", err)
}
path := filepath.Join(reviewDir, filename)
data, err := json.MarshalIndent(packet, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal packet: %w", err)
}
if err := os.WriteFile(path, data, 0644); err != nil {
return fmt.Errorf("failed to write packet: %w", err)
}
return nil
}
func (g *PacketGenerator) Load(filename string) (*ReviewPacket, error) {
path := filepath.Join(g.dataDir, "review", filename)
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read packet: %w", err)
}
var packet ReviewPacket
if err := json.Unmarshal(data, &packet); err != nil {
return nil, fmt.Errorf("failed to parse packet: %w", err)
}
return &packet, nil
}
func (g *PacketGenerator) ImportReview(filename string, responses map[string]string) error {
_, err := g.Load(filename)
if err != nil {
return err
}
findingsPath := filepath.Join(g.dataDir, "quality", "status.json")
data, err := os.ReadFile(findingsPath)
if err != nil {
return fmt.Errorf("failed to read findings: %w", err)
}
var state struct {
Findings []quality.Finding `json:"findings"`
}
if err := json.Unmarshal(data, &state); err != nil {
return fmt.Errorf("failed to parse findings: %w", err)
}
for _, f := range state.Findings {
if response, ok := responses[f.ID]; ok {
if f.Metadata == nil {
f.Metadata = make(map[string]string)
}
f.Metadata["review_response"] = response
f.Metadata["reviewed_at"] = time.Now().Format(time.RFC3339)
}
}
updatedData, err := json.MarshalIndent(state, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal updated findings: %w", err)
}
if err := os.WriteFile(findingsPath, updatedData, 0644); err != nil {
return fmt.Errorf("failed to write updated findings: %w", err)
}
return nil
}
+233
View File
@@ -0,0 +1,233 @@
package quality
import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"time"
)
// Scanner orchestrates the code quality scanning process
type Scanner struct {
detectors map[string]Detector
finder FileFinder
config *Config
}
// NewScanner creates a new quality scanner
func NewScanner(config *Config) *Scanner {
return &Scanner{
detectors: make(map[string]Detector),
config: config,
}
}
// RegisterDetector registers a detector with the scanner
func (s *Scanner) RegisterDetector(detector Detector) {
s.detectors[detector.Name()] = detector
}
// SetFileFinder sets the file finder for the scanner
func (s *Scanner) SetFileFinder(finder FileFinder) {
s.finder = finder
}
// Scan performs a comprehensive quality scan
func (s *Scanner) Scan(ctx context.Context) (*ScanResult, error) {
start := time.Now()
log.Printf("Starting quality scan for path: %s", s.config.Path)
allFindings := make([]Finding, 0)
filesChecked := 0
// Determine language if not specified
language := s.config.Language
if language == "" {
language = s.detectLanguage(s.config.Path)
log.Printf("Auto-detected language: %s", language)
}
// Get source files
files, err := s.getSourceFiles(s.config.Path, language)
if err != nil {
return nil, fmt.Errorf("failed to get source files: %w", err)
}
filesChecked = len(files)
log.Printf("Found %d source files to analyze", filesChecked)
// Run all detectors
for name, detector := range s.detectors {
log.Printf("Running detector: %s", name)
// Skip language-specific detectors for different languages
if langDetector, ok := detector.(LanguageDetector); ok {
supported := langDetector.SupportedLanguages()
if !contains(supported, language) {
log.Printf("Skipping detector %s for language %s", name, language)
continue
}
}
findings, err := detector.Detect(ctx, s.config.Path, s.config)
if err != nil {
log.Printf("Detector %s failed: %v", name, err)
continue
}
// Filter findings based on exclude patterns
filtered := s.filterFindings(findings)
allFindings = append(allFindings, filtered...)
log.Printf("Detector %s found %d issues", name, len(filtered))
}
// Calculate scores
score, strictScore := s.calculateScores(allFindings)
duration := time.Since(start)
result := &ScanResult{
Findings: allFindings,
Score: score,
StrictScore: strictScore,
FilesChecked: filesChecked,
Duration: duration.String(),
Timestamp: time.Now(),
}
log.Printf("Scan completed in %s: %d findings, score: %d (strict: %d)",
duration, len(allFindings), score, strictScore)
return result, nil
}
// detectLanguage attempts to auto-detect the project language
func (s *Scanner) detectLanguage(path string) string {
// Check for marker files
markers := map[string]string{
"go.mod": "go",
"package.json": "typescript",
"tsconfig.json": "typescript",
"requirements.txt": "python",
"setup.py": "python",
"pyproject.toml": "python",
"pom.xml": "java",
"build.gradle": "java",
"Cargo.toml": "rust",
"composer.json": "php",
}
for file, lang := range markers {
if _, err := filepath.Abs(filepath.Join(path, file)); err == nil {
if _, err := filepath.Glob(filepath.Join(path, file)); err == nil {
return lang
}
}
}
// Default to Go if no markers found
return "go"
}
// getSourceFiles gets all source files for the given language and path
func (s *Scanner) getSourceFiles(path, language string) ([]string, error) {
if s.finder != nil {
return s.finder.FindFiles(path, language)
}
// Fallback to basic file extension matching
extensions := map[string][]string{
"go": {".go"},
"typescript": {".ts", ".tsx"},
"python": {".py"},
"java": {".java"},
"rust": {".rs"},
"javascript": {".js", ".jsx"},
}
langExts, ok := extensions[language]
if !ok {
langExts = []string{".go"} // default to Go
}
var files []string
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
// Skip hidden directories and common exclude dirs
base := filepath.Base(filePath)
if strings.HasPrefix(base, ".") || base == "node_modules" || base == "vendor" {
return filepath.SkipDir
}
return nil
}
// Check file extension
ext := filepath.Ext(filePath)
for _, langExt := range langExts {
if ext == langExt {
if !ShouldExclude(filePath, s.config.Exclude) {
files = append(files, filePath)
}
break
}
}
return nil
})
return files, err
}
// filterFindings filters findings based on exclude patterns
func (s *Scanner) filterFindings(findings []Finding) []Finding {
if len(s.config.Exclude) == 0 {
return findings
}
var filtered []Finding
for _, finding := range findings {
if !ShouldExclude(finding.File, s.config.Exclude) {
filtered = append(filtered, finding)
}
}
return filtered
}
// calculateScores calculates quality scores based on findings
func (s *Scanner) calculateScores(findings []Finding) (int, int) {
totalScore := 0
strictScore := 0
for _, finding := range findings {
weight := int(finding.Severity)
score := finding.Score * weight
totalScore += score
// Strict score includes open and wontfix findings
if finding.Status == StatusOpen || finding.Status == StatusWontfix {
strictScore += score
}
}
return totalScore, strictScore
}
// contains checks if a slice contains a string
func contains(slice []string, item string) bool {
for _, s := range slice {
if s == item {
return true
}
}
return false
}
+331
View File
@@ -0,0 +1,331 @@
package scorecard
import (
"fmt"
"image"
"image/png"
"os"
"path/filepath"
"sort"
"time"
"github.com/yourorg/devour/internal/quality"
)
type Dimension struct {
Name string
Score float64
Strict float64
Count int
}
type ScorecardData struct {
ProjectName string
Version string
OverallScore float64
StrictScore float64
Grade string
FindingsTotal int
FindingsOpen int
LastScan time.Time
Dimensions []Dimension
FindByType map[string]int
FindByTier map[string]int
}
func Generate(data *ScorecardData, outputPath string) error {
width := 780 * Scale
leftPanelWidth := 260 * Scale
frameInset := 5 * Scale
rowCount := len(data.Dimensions)
if rowCount < 4 {
rowCount = 4
}
cols := 2
rowsPerCol := (rowCount + cols - 1) / cols
rowH := 20 * Scale
tableContentH := 14*Scale + 4*Scale + 6*Scale + rowsPerCol*rowH
contentH := max(tableContentH+28*Scale, 150*Scale)
height := 12*Scale + contentH
img := image.NewRGBA(image.Rect(0, 0, width, height))
dc := NewDrawContext(img, Scale)
dc.FillBackground(BG)
dc.DrawDoubleFrame(0, 0, width-1, height-1, FRAME, BORDER, 2*Scale, 1)
contentTop := frameInset + Scale
contentBot := height - frameInset - Scale
contentMidY := (contentTop + contentBot) / 2
dividerX := leftPanelWidth
drawLeftPanel(dc, data, frameInset+11*Scale, dividerX-11*Scale, contentTop+4*Scale, contentBot-4*Scale)
dc.DrawVertRuleWithOrnament(dividerX, contentTop+12*Scale, contentBot-12*Scale, contentMidY, BORDER, ACCENT)
drawRightPanel(dc, data, dividerX+11*Scale, width-frameInset-11*Scale, contentTop+4*Scale, contentBot-4*Scale)
dir := filepath.Dir(outputPath)
if dir != "" {
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
}
f, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
defer f.Close()
if err := png.Encode(f, img); err != nil {
return fmt.Errorf("failed to encode PNG: %w", err)
}
return nil
}
func drawLeftPanel(dc *DrawContext, data *ScorecardData, lpLeft, lpRight, lpTop, lpBot int) {
lpCenter := (lpLeft + lpRight) / 2
panelWidth := lpRight - lpLeft
panelHeight := lpBot - lpTop
dc.DrawRoundedRect(lpLeft, lpTop, panelWidth, panelHeight, 4*Scale, BGScore)
dc.DrawRect(lpLeft, lpTop, lpRight, lpBot, BORDER, 1)
versionText := "version unknown"
if data.Version != "" {
versionText = "v" + data.Version
}
fontVersion := GetFont()
versionW, _, versionOffY := dc.TextBounds(versionText, fontVersion)
versionY := lpTop + 12*Scale - versionOffY
dc.DrawText(versionText, lpCenter-versionW/2, versionY, fontVersion, DIM)
title := "DEVOUR SCORE"
fontTitle := GetFont()
titleW, titleH, _ := dc.TextBounds(title, fontTitle)
titleY := lpTop + 28*Scale
dc.DrawText(title, lpCenter-titleW/2, titleY, fontTitle, TEXT)
ruleY := titleY + titleH + 7*Scale
dc.DrawRuleWithOrnament(ruleY, lpLeft+28*Scale, lpRight-28*Scale, lpCenter, BORDER, ACCENT)
scoreText := FmtScore(data.OverallScore)
fontBig := GetFont()
scoreW, scoreH, scoreOffY := dc.TextBounds(scoreText, fontBig)
scoreY := ruleY + 6*Scale + 7*Scale - scoreOffY
scoreColor := GetScoreColor(int(data.OverallScore))
dc.DrawText(scoreText, lpCenter-scoreW/2, scoreY, fontBig, scoreColor)
strictLabel := "strict"
strictValue := FmtScore(data.StrictScore) + "%"
fontStrictLabel := GetFont()
fontStrictVal := GetFont()
labelW, _, labelOffY := dc.TextBounds(strictLabel, fontStrictLabel)
valueW, _, valueOffY := dc.TextBounds(strictValue, fontStrictVal)
gap := 5 * Scale
strictY := scoreY + scoreH + 6*Scale
strictX := lpCenter - (labelW+gap+valueW)/2
dc.DrawText(strictLabel, strictX, strictY-labelOffY, fontStrictLabel, DIM)
strictColor := GetScoreColorMuted(int(data.StrictScore))
dc.DrawText(strictValue, strictX+labelW+gap, strictY-valueOffY, fontStrictVal, strictColor)
projectName := data.ProjectName
if projectName == "" {
projectName = "project"
}
fontProject := GetFont()
projectW, projectH, _ := dc.TextBounds(projectName, fontProject)
pillPadX := 8 * Scale
pillPadY := 3 * Scale
pillHeight := projectH + 2*pillPadY
pillTop := strictY + projectH + 8*Scale
pillLeft := lpCenter - projectW/2 - pillPadX
pillRight := lpCenter + projectW/2 + pillPadX
dc.DrawRoundedRect(pillLeft, pillTop, pillRight-pillLeft, pillHeight, 3*Scale, BG)
dc.DrawRect(pillLeft, pillTop, pillRight, pillTop+pillHeight, BORDER, 1)
projectY := pillTop + pillPadY
dc.DrawText(projectName, lpCenter-projectW/2, projectY, fontProject, DIM)
}
func drawRightPanel(dc *DrawContext, data *ScorecardData, tableX1, tableX2, tableTop, tableBot int) {
fontRow := GetFont()
fontStrict := GetFont()
rowCount := len(data.Dimensions)
cols := 2
rowsPerCol := (rowCount + cols - 1) / cols
gridGap := 8 * Scale
gridWidth := (tableX2 - tableX1 - gridGap) / cols
rowH := 20 * Scale
for colIndex := 0; colIndex < cols; colIndex++ {
gridX1 := tableX1 + colIndex*(gridWidth+gridGap)
gridX2 := gridX1 + gridWidth
dc.DrawRoundedRect(gridX1, tableTop, gridWidth, tableBot-tableTop, 4*Scale, BGTable)
dc.DrawRect(gridX1, tableTop, gridX2, tableBot, BORDER, 1)
nameColWidth := 120 * Scale
valueColGap := 4 * Scale
valueColWidth := 34 * Scale
totalContentWidth := nameColWidth + valueColGap + valueColWidth + valueColGap + valueColWidth
blockLeft := gridX1 + (gridWidth-totalContentWidth)/2
nameColX := blockLeft
healthColX := nameColX + nameColWidth + valueColGap
strictColX := healthColX + valueColWidth + valueColGap + 4*Scale
thisColRows := rowsPerCol
if colIndex == 1 && rowCount%2 != 0 {
thisColRows = rowsPerCol - 1
}
if colIndex*rowsPerCol+thisColRows > rowCount {
thisColRows = rowCount - colIndex*rowsPerCol
}
contentHeight := thisColRows * rowH
contentTop := (tableTop+tableBot)/2 - contentHeight/2
_, rowTextH, rowTextOff := dc.TextBounds("Xg", fontRow)
startIdx := colIndex * rowsPerCol
for rowIdx := 0; rowIdx < thisColRows; rowIdx++ {
dimIdx := startIdx + rowIdx
if dimIdx >= rowCount {
break
}
dim := data.Dimensions[dimIdx]
bandTop := contentTop + rowIdx*rowH
if rowIdx%2 == 1 {
dc.FillRect(gridX1+1, bandTop, gridWidth-2, rowH, BGRowAlt)
}
textY := bandTop + (rowH-rowTextH)/2 - rowTextOff + Scale
maxNameWidth := nameColWidth - 2*Scale
name := dc.TruncateText(dim.Name, maxNameWidth, fontRow)
dc.DrawText(name, nameColX, textY, fontRow, TEXT)
score := dim.Score
if score == 0 {
score = 100
}
scoreText := FmtScore(score) + "%"
dc.DrawText(scoreText, healthColX, textY, fontRow, GetScoreColor(int(score)))
strict := dim.Strict
if strict == 0 {
strict = score
}
strictText := FmtScore(strict) + "%"
_, strictTextH, strictOff := dc.TextBounds(strictText, fontStrict)
strictY := bandTop + (rowH-strictTextH)/2 - strictOff
dc.DrawText(strictText, strictColX, strictY, fontStrict, GetScoreColorMuted(int(strict)))
}
}
}
// FromQualityState creates ScorecardData from quality state
func FromQualityState(state *quality.State, projectName, version string) *ScorecardData {
data := &ScorecardData{
ProjectName: projectName,
Version: version,
FindingsTotal: len(state.Findings),
LastScan: state.LastScan,
FindByType: make(map[string]int),
FindByTier: make(map[string]int),
}
// Get score from scorecard
if state.Scorecard != nil {
data.OverallScore = float64(state.Scorecard.TotalScore)
data.StrictScore = float64(state.Scorecard.StrictScore)
data.FindByType = state.Scorecard.FindingsByType
data.FindByTier = make(map[string]int)
for sev, count := range state.Scorecard.FindingsByTier {
data.FindByTier[fmt.Sprintf("T%d", sev)] = count
}
}
// Calculate grade
data.Grade = GetScoreGrade(int(data.OverallScore))
// Count open findings
for _, f := range state.Findings {
if f.Status == quality.StatusOpen {
data.FindingsOpen++
}
}
// Build dimensions from findings by type
data.Dimensions = buildDimensions(state)
return data
}
// buildDimensions builds dimension list from quality state
func buildDimensions(state *quality.State) []Dimension {
dims := []Dimension{}
byType := make(map[string]*Dimension)
for _, f := range state.Findings {
if f.Status == quality.StatusOpen {
if _, exists := byType[f.Type]; !exists {
byType[f.Type] = &Dimension{
Name: formatDimensionName(f.Type),
Score: 100,
Count: 0,
}
}
byType[f.Type].Count++
byType[f.Type].Score -= float64(f.Severity)
if byType[f.Type].Score < 0 {
byType[f.Type].Score = 0
}
}
}
for _, dim := range byType {
dim.Strict = dim.Score
dims = append(dims, *dim)
}
sort.Slice(dims, func(i, j int) bool {
return dims[i].Count > dims[j].Count
})
if len(dims) > 12 {
dims = dims[:12]
}
return dims
}
// formatDimensionName formats a dimension name for display
func formatDimensionName(name string) string {
// Map internal names to display names
nameMap := map[string]string{
"complexity": "Complexity",
"duplication": "Duplication",
"naming": "Naming",
"security": "Security",
"dead_code": "Dead Code",
"unused_import": "Unused Import",
"unused_var": "Unused Variable",
"god_component": "God Component",
"mixed_concerns": "Mixed Concerns",
"test_coverage": "Test Coverage",
}
if display, ok := nameMap[name]; ok {
return display
}
if len(name) > 0 {
return string(name[0]-32) + name[1:]
}
return name
}
+229
View File
@@ -0,0 +1,229 @@
package scorecard
import (
"image"
"image/color"
"image/draw"
"strconv"
"golang.org/x/image/font"
"golang.org/x/image/font/basicfont"
"golang.org/x/image/math/fixed"
)
type DrawContext struct {
Img *image.RGBA
Scale int
}
func NewDrawContext(img *image.RGBA, scale int) *DrawContext {
return &DrawContext{Img: img, Scale: scale}
}
func (dc *DrawContext) S(v int) int {
return v * dc.Scale
}
func (dc *DrawContext) FillRect(x, y, w, h int, c color.RGBA) {
for dy := 0; dy < h; dy++ {
for dx := 0; dx < w; dx++ {
px, py := x+dx, y+dy
if px >= 0 && px < dc.Img.Bounds().Dx() && py >= 0 && py < dc.Img.Bounds().Dy() {
dc.Img.Set(px, py, c)
}
}
}
}
func (dc *DrawContext) DrawRect(x1, y1, x2, y2 int, c color.RGBA, width int) {
for i := 0; i < width; i++ {
dc.DrawHLine(x1, y1+i, x2, c)
dc.DrawHLine(x1, y2-i, x2, c)
dc.DrawVLine(x1+i, y1, y2, c)
dc.DrawVLine(x2-i, y1, y2, c)
}
}
func (dc *DrawContext) DrawHLine(x1, y, x2 int, c color.RGBA) {
if y < 0 || y >= dc.Img.Bounds().Dy() {
return
}
if x1 > x2 {
x1, x2 = x2, x1
}
for x := x1; x <= x2; x++ {
if x >= 0 && x < dc.Img.Bounds().Dx() {
dc.Img.Set(x, y, c)
}
}
}
func (dc *DrawContext) DrawVLine(x, y1, y2 int, c color.RGBA) {
if x < 0 || x >= dc.Img.Bounds().Dx() {
return
}
if y1 > y2 {
y1, y2 = y2, y1
}
for y := y1; y <= y2; y++ {
if y >= 0 && y < dc.Img.Bounds().Dy() {
dc.Img.Set(x, y, c)
}
}
}
func (dc *DrawContext) DrawRoundedRect(x, y, w, h, r int, c color.RGBA) {
dc.FillRect(x+r, y, w-2*r, h, c)
dc.FillRect(x, y+r, w, h-2*r, c)
for dy := -r; dy <= 0; dy++ {
for dx := -r; dx <= 0; dx++ {
if dx*dx+dy*dy >= r*r {
continue
}
dc.Img.Set(x+r+dx, y+r+dy, c)
dc.Img.Set(x+w-r-1-dx, y+r+dy, c)
dc.Img.Set(x+r+dx, y+h-r-1-dy, c)
dc.Img.Set(x+w-r-1-dx, y+h-r-1-dy, c)
}
}
}
func (dc *DrawContext) DrawRoundedRectWithOutline(x, y, w, h, r int, fill, outline color.RGBA, outlineWidth int) {
dc.DrawRoundedRect(x, y, w, h, r, fill)
rr := r - outlineWidth
if rr < 0 {
rr = 0
}
for i := 0; i < outlineWidth; i++ {
ri := r - i
if ri < 0 {
ri = 0
}
dc.DrawHLine(x+ri, y+i, x+w-ri-1, outline)
dc.DrawHLine(x+ri, y+h-i-1, x+w-ri-1, outline)
dc.DrawVLine(x+i, y+ri, y+h-ri-1, outline)
dc.DrawVLine(x+w-i-1, y+ri, y+h-ri-1, outline)
}
}
func (dc *DrawContext) DrawDiamond(cx, cy, size int, c color.RGBA) {
for dy := -size; dy <= size; dy++ {
for dx := -size; dx <= size; dx++ {
if abs(dx)+abs(dy) <= size {
px, py := cx+dx, cy+dy
if px >= 0 && px < dc.Img.Bounds().Dx() && py >= 0 && py < dc.Img.Bounds().Dy() {
dc.Img.Set(px, py, c)
}
}
}
}
}
func (dc *DrawContext) DrawRuleWithOrnament(y, x1, x2, cx int, lineColor, ornamentColor color.RGBA) {
gap := dc.S(8)
dc.DrawHLine(x1, y, cx-gap, lineColor)
dc.DrawHLine(cx+gap, y, x2, lineColor)
dc.DrawDiamond(cx, y, dc.S(3), ornamentColor)
}
func (dc *DrawContext) DrawVertRuleWithOrnament(x, y1, y2, cy int, lineColor, ornamentColor color.RGBA) {
gap := dc.S(8)
dc.DrawVLine(x, y1, cy-gap, lineColor)
dc.DrawVLine(x, cy+gap, y2, lineColor)
dc.DrawDiamond(x, cy, dc.S(3), ornamentColor)
}
func (dc *DrawContext) DrawText(text string, x, y int, face font.Face, c color.RGBA) {
d := font.Drawer{
Dst: dc.Img,
Src: &image.Uniform{c},
Face: face,
Dot: fixed.Point26_6{X: fixed.I(x), Y: fixed.I(y)},
}
d.DrawString(text)
}
func (dc *DrawContext) DrawCenteredText(text string, cx, y int, face font.Face, c color.RGBA) {
advance := font.MeasureString(face, text)
x := cx - (advance.Ceil() / 2)
dc.DrawText(text, x, y, face, c)
}
func (dc *DrawContext) DrawRightAlignedText(text string, rx, y int, face font.Face, c color.RGBA) {
advance := font.MeasureString(face, text)
x := rx - advance.Ceil()
dc.DrawText(text, x, y, face, c)
}
func (dc *DrawContext) FillBackground(c color.RGBA) {
draw.Draw(dc.Img, dc.Img.Bounds(), &image.Uniform{c}, image.Point{}, draw.Src)
}
func (dc *DrawContext) DrawDoubleFrame(x1, y1, x2, y2 int, outerColor, innerColor color.RGBA, outerWidth, innerWidth int) {
dc.DrawRect(x1, y1, x2, y2, outerColor, outerWidth)
innerX1 := x1 + outerWidth + 2
innerY1 := y1 + outerWidth + 2
innerX2 := x2 - outerWidth - 2
innerY2 := y2 - outerWidth - 2
dc.DrawRect(innerX1, innerY1, innerX2, innerY2, innerColor, innerWidth)
}
func (dc *DrawContext) TextWidth(text string, face font.Face) int {
return font.MeasureString(face, text).Ceil()
}
func (dc *DrawContext) TextBounds(text string, face font.Face) (width, height, offsetY int) {
advance := font.MeasureString(face, text)
width = advance.Ceil()
metrics := face.Metrics()
height = (metrics.Ascent + metrics.Descent).Ceil()
offsetY = -metrics.Ascent.Ceil()
return
}
func (dc *DrawContext) TruncateText(text string, maxWidth int, face font.Face) string {
if dc.TextWidth(text, face) <= maxWidth {
return text
}
ellipsis := "…"
ellipsisWidth := dc.TextWidth(ellipsis, face)
for len(text) > 0 {
text = text[:len(text)-1]
if dc.TextWidth(text, face)+ellipsisWidth <= maxWidth {
return text + ellipsis
}
}
return ellipsis
}
func GetFont() font.Face {
return basicfont.Face7x13
}
func FmtScore(score float64) string {
if score == float64(int(score)) {
return strconv.Itoa(int(score))
}
return strconv.FormatFloat(score, 'f', 1, 64)
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
+136
View File
@@ -0,0 +1,136 @@
package scorecard
import "image/color"
// Scale for retina/high-DPI rendering
const Scale = 2
// Theme colors for the scorecard badge - warm earth-tone palette
var (
// BG is the main background (warm cream)
BG = color.RGBA{R: 247, G: 240, B: 228, A: 255}
// BGScore is the score panel background
BGScore = color.RGBA{R: 240, G: 232, B: 217, A: 255}
// BGTable is the table background
BGTable = color.RGBA{R: 240, G: 233, B: 220, A: 255}
// BGRowAlt is the alternate row background
BGRowAlt = color.RGBA{R: 234, G: 226, B: 212, A: 255}
// TEXT is the main text color (dark brown)
TEXT = color.RGBA{R: 58, G: 48, B: 38, A: 255}
// DIM is the dimmed text color (warm gray)
DIM = color.RGBA{R: 138, G: 122, B: 102, A: 255}
// BORDER is the inner border color (warm tan)
BORDER = color.RGBA{R: 192, G: 176, B: 152, A: 255}
// ACCENT is the accent color (warm brown)
ACCENT = color.RGBA{R: 148, G: 112, B: 82, A: 255}
// FRAME is the outer frame color (warm tan)
FRAME = color.RGBA{R: 172, G: 152, B: 126, A: 255}
)
// Score grade colors - gradient from sage to rose
var (
// GradeA is for scores 90-100% (deep sage green)
GradeA = color.RGBA{R: 68, G: 120, B: 68, A: 255}
// GradeB is for scores 70-89% (olive green)
GradeB = color.RGBA{R: 120, G: 140, B: 72, A: 255}
// GradeC is for scores 50-69% (yellow-green)
GradeC = color.RGBA{R: 145, G: 155, B: 80, A: 255}
// GradeD is for scores 30-49% (mustard)
GradeD = color.RGBA{R: 180, G: 150, B: 70, A: 255}
// GradeF is for scores 0-29% (dusty rose)
GradeF = color.RGBA{R: 170, G: 110, B: 90, A: 255}
)
// Muted score colors for strict column (pastel orange/peach shades)
var (
// GradeAMuted is muted version of GradeA
GradeAMuted = color.RGBA{R: 195, G: 160, B: 115, A: 255} // light sandy peach
// GradeBMuted is muted version of GradeB
GradeBMuted = color.RGBA{R: 200, G: 148, B: 100, A: 255} // warm apricot
// GradeCMuted is muted version of GradeC
GradeCMuted = color.RGBA{R: 195, G: 125, B: 95, A: 255} // soft coral
// GradeDMuted is muted version of GradeD
GradeDMuted = color.RGBA{R: 190, G: 130, B: 100, A: 255}
// GradeFMuted is muted version of GradeF
GradeFMuted = color.RGBA{R: 185, G: 120, B: 100, A: 255}
)
// Severity colors for findings
var (
SeverityT1Color = color.RGBA{R: 100, G: 180, B: 255, A: 255}
SeverityT2Color = color.RGBA{R: 255, G: 200, B: 100, A: 255}
SeverityT3Color = color.RGBA{R: 255, G: 140, B: 80, A: 255}
SeverityT4Color = color.RGBA{R: 255, G: 80, B: 80, A: 255}
)
func GetGradeColor(grade string) color.RGBA {
switch grade {
case "A":
return GradeA
case "B":
return GradeB
case "C":
return GradeC
case "D":
return GradeD
default:
return GradeF
}
}
func GetGradeColorMuted(grade string) color.RGBA {
switch grade {
case "A":
return GradeAMuted
case "B":
return GradeBMuted
case "C":
return GradeCMuted
case "D":
return GradeDMuted
default:
return GradeFMuted
}
}
func GetScoreGrade(score int) string {
switch {
case score >= 90:
return "A"
case score >= 70:
return "B"
case score >= 50:
return "C"
case score >= 30:
return "D"
default:
return "F"
}
}
func GetScoreColor(score int) color.RGBA {
return GetGradeColor(GetScoreGrade(score))
}
func GetScoreColorMuted(score int) color.RGBA {
return GetGradeColorMuted(GetScoreGrade(score))
}
func GetSeverityColor(severity int) color.RGBA {
switch severity {
case 1:
return SeverityT1Color
case 2:
return SeverityT2Color
case 3:
return SeverityT3Color
case 4:
return SeverityT4Color
default:
return DIM
}
}
func ScaleValue(v int) int {
return v * Scale
}
+203
View File
@@ -0,0 +1,203 @@
package quality
import (
"fmt"
"time"
)
// Scorer calculates quality scores and generates scorecards
type Scorer struct {
targetScore int
}
// NewScorer creates a new scorer with the given target score
func NewScorer(targetScore int) *Scorer {
if targetScore <= 0 {
targetScore = 95 // Default target
}
return &Scorer{
targetScore: targetScore,
}
}
// CalculateScore calculates the quality score from findings
func (s *Scorer) CalculateScore(findings []Finding) (int, int) {
totalScore := 0
strictScore := 0
for _, finding := range findings {
weight := int(finding.Severity)
score := finding.Score * weight
totalScore += score
// Strict score includes open and wontfix findings
if finding.Status == StatusOpen || finding.Status == StatusWontfix {
strictScore += score
}
}
return totalScore, strictScore
}
// GenerateScorecard creates a scorecard from scan results
func (s *Scorer) GenerateScorecard(findings []Finding, lastScan time.Time) *Scorecard {
totalScore, strictScore := s.CalculateScore(findings)
// Group findings by type and tier
findingsByType := make(map[string]int)
findingsByTier := make(map[Severity]int)
statusByType := make(map[string]int)
for _, finding := range findings {
findingsByType[finding.Type]++
findingsByTier[finding.Severity]++
statusByType[string(finding.Status)]++
}
return &Scorecard{
TotalScore: totalScore,
StrictScore: strictScore,
TargetScore: s.targetScore,
FindingsByType: findingsByType,
FindingsByTier: findingsByTier,
StatusByType: statusByType,
LastScan: lastScan,
}
}
// GetHealthGrade returns a health grade based on score
func (s *Scorer) GetHealthGrade(score int) string {
percentage := s.getScorePercentage(score)
switch {
case percentage >= 90:
return "A"
case percentage >= 80:
return "B"
case percentage >= 70:
return "C"
case percentage >= 60:
return "D"
default:
return "F"
}
}
// getScorePercentage converts score to percentage (inverted - lower is better)
func (s *Scorer) getScorePercentage(score int) int {
// Invert score so lower debt = higher percentage
maxPossibleScore := 1000 // Arbitrary high value for normalization
percentage := 100 - (score * 100 / maxPossibleScore)
if percentage < 0 {
percentage = 0
}
return percentage
}
// FormatScorecard formats the scorecard for display
func (s *Scorer) FormatScorecard(card *Scorecard) string {
grade := s.GetHealthGrade(card.StrictScore)
percentage := s.getScorePercentage(card.StrictScore)
output := fmt.Sprintf(`
Code Quality Scorecard
=======================================
Overall Health: %s (%d%%)
Target Score: %d
Current Score: %d (strict: %d)
Findings by Type:
`, grade, percentage, card.TargetScore, card.TotalScore, card.StrictScore)
for ftype, count := range card.FindingsByType {
output += fmt.Sprintf(" - %s: %d\n", ftype, count)
}
output += "\nFindings by Severity:\n"
tierNames := map[Severity]string{
SeverityT1: "T1 (Auto-fixable)",
SeverityT2: "T2 (Quick manual)",
SeverityT3: "T3 (Needs judgment)",
SeverityT4: "T4 (Major refactor)",
}
for severity, count := range card.FindingsByTier {
if name, ok := tierNames[severity]; ok {
output += fmt.Sprintf(" - %s: %d\n", name, count)
}
}
output += "\nStatus Breakdown:\n"
for status, count := range card.StatusByType {
output += fmt.Sprintf(" - %s: %d\n", status, count)
}
output += fmt.Sprintf("\nLast Scan: %s\n", card.LastScan.Format("2006-01-02 15:04:05"))
return output
}
// GetNextPriority returns the next highest priority finding to fix
func (s *Scorer) GetNextPriority(findings []Finding) *Finding {
if len(findings) == 0 {
return nil
}
var highest *Finding
highestWeight := 0
for _, finding := range findings {
if finding.Status != StatusOpen {
continue
}
weight := int(finding.Severity) * finding.Score
if weight > highestWeight {
highestWeight = weight
highest = &finding
}
}
return highest
}
// GetFindingsByTier returns findings grouped by severity tier
func (s *Scorer) GetFindingsByTier(findings []Finding) map[Severity][]Finding {
result := make(map[Severity][]Finding)
for _, finding := range findings {
if finding.Status == StatusOpen {
result[finding.Severity] = append(result[finding.Severity], finding)
}
}
return result
}
// GetProgressMetrics returns progress metrics for the scan
func (s *Scorer) GetProgressMetrics(findings []Finding) map[string]interface{} {
total := len(findings)
open := 0
fixed := 0
wontfix := 0
for _, finding := range findings {
switch finding.Status {
case StatusOpen:
open++
case StatusFixed:
fixed++
case StatusWontfix:
wontfix++
}
}
return map[string]interface{}{
"total": total,
"open": open,
"fixed": fixed,
"wontfix": wontfix,
"progress": float64(fixed) / float64(total) * 100,
}
}
+371
View File
@@ -0,0 +1,371 @@
package quality
import (
"crypto/sha256"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
// StateManager manages quality analysis state with diff tracking
type StateManager struct {
dataDir string
stateFile string
historyDir string
}
// State represents the persisted quality state
type State struct {
Findings []Finding `json:"findings"`
Scorecard *Scorecard `json:"scorecard"`
LastScan time.Time `json:"last_scan"`
ScanCount int `json:"scan_count"`
ContentHash string `json:"content_hash"`
History []StateSnapshot `json:"history,omitempty"`
Metadata map[string]string `json:"metadata,omitempty"`
}
// StateSnapshot represents a historical state snapshot
type StateSnapshot struct {
Timestamp time.Time `json:"timestamp"`
Hash string `json:"hash"`
Score int `json:"score"`
StrictScore int `json:"strict_score"`
Findings int `json:"findings"`
File string `json:"file"`
}
// StateDiff represents the difference between two states
type StateDiff struct {
Added []Finding `json:"added"`
Removed []Finding `json:"removed"`
Changed []Finding `json:"changed"`
Resolved []Finding `json:"resolved"`
Regressions []Finding `json:"regressions"`
}
// NewStateManager creates a new state manager
func NewStateManager(dataDir string) *StateManager {
return &StateManager{
dataDir: dataDir,
stateFile: filepath.Join(dataDir, "state.json"),
historyDir: filepath.Join(dataDir, "history"),
}
}
// Load loads the current state from disk
func (sm *StateManager) Load() (*State, error) {
data, err := os.ReadFile(sm.stateFile)
if err != nil {
if os.IsNotExist(err) {
return &State{
Findings: []Finding{},
Metadata: make(map[string]string),
}, nil
}
return nil, fmt.Errorf("failed to read state: %w", err)
}
var state State
if err := json.Unmarshal(data, &state); err != nil {
return nil, fmt.Errorf("failed to parse state: %w", err)
}
return &state, nil
}
// Save saves the state to disk
func (sm *StateManager) Save(state *State) error {
// Ensure directory exists
if err := os.MkdirAll(sm.dataDir, 0755); err != nil {
return fmt.Errorf("failed to create data directory: %w", err)
}
// Calculate content hash
state.ContentHash = sm.calculateHash(state.Findings)
// Save history snapshot
if err := sm.saveHistory(state); err != nil {
// Log but don't fail
fmt.Fprintf(os.Stderr, "Warning: failed to save history: %v\n", err)
}
// Marshal state
data, err := json.MarshalIndent(state, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal state: %w", err)
}
// Write to temp file first
tmpFile := sm.stateFile + ".tmp"
if err := os.WriteFile(tmpFile, data, 0644); err != nil {
return fmt.Errorf("failed to write state: %w", err)
}
// Rename to final location (atomic on most filesystems)
if err := os.Rename(tmpFile, sm.stateFile); err != nil {
return fmt.Errorf("failed to rename state file: %w", err)
}
return nil
}
// Merge merges new findings with existing state
func (sm *StateManager) Merge(state *State, newFindings []Finding) *StateDiff {
diff := &StateDiff{
Added: []Finding{},
Removed: []Finding{},
Changed: []Finding{},
Resolved: []Finding{},
}
// Create lookup maps
existingMap := make(map[string]Finding)
for _, f := range state.Findings {
existingMap[f.ID] = f
}
newMap := make(map[string]Finding)
for _, f := range newFindings {
newMap[f.ID] = f
}
// Find added and changed findings
for _, new := range newFindings {
if existing, ok := existingMap[new.ID]; ok {
// Check if changed
if !findingsEqual(existing, new) {
diff.Changed = append(diff.Changed, new)
}
} else {
// New finding
diff.Added = append(diff.Added, new)
}
}
// Find removed findings (these are resolved)
for _, existing := range state.Findings {
if _, ok := newMap[existing.ID]; !ok {
if existing.Status == StatusOpen {
diff.Resolved = append(diff.Resolved, existing)
}
}
}
// Update state
state.Findings = newFindings
state.LastScan = time.Now()
state.ScanCount++
return diff
}
// Diff compares two states
func (sm *StateManager) Diff(old, new *State) *StateDiff {
diff := &StateDiff{
Added: []Finding{},
Removed: []Finding{},
Changed: []Finding{},
Resolved: []Finding{},
Regressions: []Finding{},
}
oldMap := make(map[string]Finding)
for _, f := range old.Findings {
oldMap[f.ID] = f
}
newMap := make(map[string]Finding)
for _, f := range new.Findings {
newMap[f.ID] = f
}
for _, n := range new.Findings {
if o, ok := oldMap[n.ID]; ok {
if !findingsEqual(o, n) {
diff.Changed = append(diff.Changed, n)
// Check for regression (resolved -> open)
if o.Status != StatusOpen && n.Status == StatusOpen {
diff.Regressions = append(diff.Regressions, n)
}
}
} else {
diff.Added = append(diff.Added, n)
}
}
for _, o := range old.Findings {
if _, ok := newMap[o.ID]; !ok {
diff.Removed = append(diff.Removed, o)
}
}
return diff
}
// calculateHash calculates a content hash for findings
func (sm *StateManager) calculateHash(findings []Finding) string {
// Sort findings for consistent hashing
sort.Slice(findings, func(i, j int) bool {
return findings[i].ID < findings[j].ID
})
// Create hash from findings
data, _ := json.Marshal(findings)
hash := sha256.Sum256(data)
return fmt.Sprintf("%x", hash)[:16]
}
// saveHistory saves a historical snapshot
func (sm *StateManager) saveHistory(state *State) error {
if err := os.MkdirAll(sm.historyDir, 0755); err != nil {
return err
}
// Create snapshot
snapshot := StateSnapshot{
Timestamp: time.Now(),
Hash: state.ContentHash,
Score: state.Scorecard.TotalScore,
StrictScore: state.Scorecard.StrictScore,
Findings: len(state.Findings),
File: fmt.Sprintf("%s.json", state.ContentHash),
}
// Save snapshot file
snapshotFile := filepath.Join(sm.historyDir, snapshot.File)
snapshotData, _ := json.MarshalIndent(state, "", " ")
if err := os.WriteFile(snapshotFile, snapshotData, 0644); err != nil {
return err
}
// Update history in state (keep last 50 snapshots)
state.History = append(state.History, snapshot)
if len(state.History) > 50 {
// Remove old snapshots
for _, old := range state.History[:len(state.History)-50] {
oldFile := filepath.Join(sm.historyDir, old.File)
os.Remove(oldFile) // Ignore errors
}
state.History = state.History[len(state.History)-50:]
}
return nil
}
// ResolveFinding updates a finding's status
func (sm *StateManager) ResolveFinding(state *State, id string, status Status, note string) error {
for i, f := range state.Findings {
if f.ID == id {
state.Findings[i].Status = status
state.Findings[i].UpdatedAt = time.Now()
if state.Findings[i].Metadata == nil {
state.Findings[i].Metadata = make(map[string]string)
}
state.Findings[i].Metadata["resolution_note"] = note
return nil
}
}
return fmt.Errorf("finding not found: %s", id)
}
// GetFinding retrieves a finding by ID
func (sm *StateManager) GetFinding(state *State, id string) *Finding {
for _, f := range state.Findings {
if f.ID == id {
return &f
}
}
return nil
}
// GetOpenFindings returns all open findings
func (sm *StateManager) GetOpenFindings(state *State) []Finding {
var open []Finding
for _, f := range state.Findings {
if f.Status == StatusOpen {
open = append(open, f)
}
}
return open
}
// GetFindingsByTier returns findings grouped by severity
func (sm *StateManager) GetFindingsByTier(state *State) map[Severity][]Finding {
result := make(map[Severity][]Finding)
for _, f := range state.Findings {
result[f.Severity] = append(result[f.Severity], f)
}
return result
}
// GetTrend returns the trend over the last N scans
func (sm *StateManager) GetTrend(state *State, n int) []StateSnapshot {
if len(state.History) < n {
return state.History
}
return state.History[len(state.History)-n:]
}
// findingsEqual checks if two findings are equal (excluding timestamps)
func findingsEqual(a, b Finding) bool {
return a.ID == b.ID &&
a.Type == b.Type &&
a.Title == b.Title &&
a.File == b.File &&
a.Line == b.Line &&
a.Severity == b.Severity &&
a.Score == b.Score &&
a.Status == b.Status
}
// FormatDiff formats a state diff for display
func FormatDiff(diff *StateDiff) string {
var sb strings.Builder
if len(diff.Added) > 0 {
sb.WriteString(fmt.Sprintf("[+] Added: %d findings\n", len(diff.Added)))
for _, f := range diff.Added {
sb.WriteString(fmt.Sprintf(" - %s: %s\n", f.ID, f.Title))
}
}
if len(diff.Removed) > 0 {
sb.WriteString(fmt.Sprintf("[-] Removed: %d findings\n", len(diff.Removed)))
for _, f := range diff.Removed {
sb.WriteString(fmt.Sprintf(" - %s: %s\n", f.ID, f.Title))
}
}
if len(diff.Changed) > 0 {
sb.WriteString(fmt.Sprintf("[~] Changed: %d findings\n", len(diff.Changed)))
for _, f := range diff.Changed {
sb.WriteString(fmt.Sprintf(" - %s: %s\n", f.ID, f.Title))
}
}
if len(diff.Resolved) > 0 {
sb.WriteString(fmt.Sprintf("[OK] Resolved: %d findings\n", len(diff.Resolved)))
for _, f := range diff.Resolved {
sb.WriteString(fmt.Sprintf(" - %s: %s\n", f.ID, f.Title))
}
}
if len(diff.Regressions) > 0 {
sb.WriteString(fmt.Sprintf("[!] Regressions: %d findings\n", len(diff.Regressions)))
for _, f := range diff.Regressions {
sb.WriteString(fmt.Sprintf(" - %s: %s\n", f.ID, f.Title))
}
}
if sb.Len() == 0 {
sb.WriteString("No changes detected\n")
}
return sb.String()
}
+111
View File
@@ -0,0 +1,111 @@
package quality
import (
"time"
)
// Severity represents the severity level of a finding
type Severity int
const (
SeverityT1 Severity = iota + 1 // Auto-fixable (unused imports, debug logs)
SeverityT2 // Quick manual (unused vars, dead exports)
SeverityT3 // Needs judgment (near-dupes, single_use abstractions)
SeverityT4 // Major refactor (god components, mixed concerns)
)
// Status represents the status of a finding
type Status string
const (
StatusOpen Status = "open"
StatusFixed Status = "fixed"
StatusWontfix Status = "wontfix"
StatusFalsePositive Status = "false_positive"
StatusIgnored Status = "ignored"
)
// Finding represents a code quality issue
type Finding struct {
ID string `json:"id"`
Type string `json:"type"`
Title string `json:"title"`
Description string `json:"description"`
File string `json:"file"`
Line int `json:"line"`
EndLine int `json:"end_line,omitempty"`
Severity Severity `json:"severity"`
Score int `json:"score"`
Status Status `json:"status"`
Metadata map[string]string `json:"metadata,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// FunctionInfo represents extracted function/method information
type FunctionInfo struct {
Name string `json:"name"`
File string `json:"file"`
Line int `json:"line"`
EndLine int `json:"end_line"`
LOC int `json:"loc"`
Body string `json:"body"`
Normalized string `json:"normalized"`
BodyHash string `json:"body_hash"`
Params []string `json:"params"`
ReturnAnnotation string `json:"return_annotation,omitempty"`
}
// ClassInfo represents extracted class/component information
type ClassInfo struct {
Name string `json:"name"`
File string `json:"file"`
Line int `json:"line"`
LOC int `json:"loc"`
Methods []FunctionInfo `json:"methods"`
Attributes []string `json:"attributes"`
BaseClasses []string `json:"base_classes"`
Metrics map[string]int `json:"metrics"`
}
// ScanResult represents the result of a quality scan
type ScanResult struct {
Findings []Finding `json:"findings"`
Score int `json:"score"`
StrictScore int `json:"strict_score"`
FilesChecked int `json:"files_checked"`
Duration string `json:"duration"`
Timestamp time.Time `json:"timestamp"`
}
// Scorecard represents the health scorecard
type Scorecard struct {
TotalScore int `json:"total_score"`
StrictScore int `json:"strict_score"`
TargetScore int `json:"target_score"`
FindingsByType map[string]int `json:"findings_by_type"`
FindingsByTier map[Severity]int `json:"findings_by_tier"`
StatusByType map[string]int `json:"status_by_type"`
LastScan time.Time `json:"last_scan"`
}
// Language represents a programming language configuration
type Language struct {
Name string `json:"name"`
Extensions []string `json:"extensions"`
MarkerFiles []string `json:"marker_files"`
DefaultSrc string `json:"default_src"`
}
// Config represents the quality analysis configuration
type Config struct {
Path string `json:"path"`
Language string `json:"language,omitempty"`
Exclude []string `json:"exclude,omitempty"`
Threshold int `json:"threshold,omitempty"`
MinLOC int `json:"min_loc,omitempty"`
TargetScore int `json:"target_score,omitempty"`
ResetSubjective bool `json:"reset_subjective,omitempty"`
NoBadge bool `json:"no_badge,omitempty"`
BadgePath string `json:"badge_path,omitempty"`
}
+70
View File
@@ -0,0 +1,70 @@
// Package scheduler provides automatic update scheduling.
package scheduler
import (
"context"
"time"
"github.com/robfig/cron/v3"
"github.com/yourorg/devour/pkg/types"
)
// Config holds scheduler configuration.
type Config struct {
Enabled bool `yaml:"enabled"`
Interval time.Duration `yaml:"interval"`
CheckMethod string `yaml:"check_method"`
OnStartup bool `yaml:"on_startup"`
}
// Scheduler manages automatic source updates.
type Scheduler struct {
config *Config
cron *cron.Cron
sources []*types.Source
}
// New creates a new scheduler.
func New(config *Config) *Scheduler {
return &Scheduler{
config: config,
cron: cron.New(cron.WithSeconds()),
}
}
// AddSource adds a source to be monitored.
func (s *Scheduler) AddSource(source *types.Source) {
s.sources = append(s.sources, source)
}
// Start begins the scheduling loop.
func (s *Scheduler) Start(ctx context.Context) error {
if !s.config.Enabled {
return nil
}
// Schedule sync job
schedule := "@every 72h"
if s.config.Interval > 0 {
schedule = "@every " + s.config.Interval.String()
}
s.cron.AddFunc(schedule, func() {
s.syncAll(ctx)
})
s.cron.Start()
return nil
}
// Stop halts the scheduler.
func (s *Scheduler) Stop() {
s.cron.Stop()
}
func (s *Scheduler) syncAll(ctx context.Context) {
// TODO: Implement sync logic
// For each source:
// 1. Check for changes
// 2. If changed, re-scrape and re-index
}
+156
View File
@@ -0,0 +1,156 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/astrodocs"
)
type AstroDocsScraper struct {
config *Config
parser *astrodocs.Parser
client *http.Client
}
func NewAstroDocsScraper(config *Config) *AstroDocsScraper {
return &AstroDocsScraper{
config: config,
parser: astrodocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *AstroDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Astro docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
page, err := s.parser.ParsePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse Astro docs page: %w", err)
}
mainDoc := s.pageToDocument(page, source.Name)
documents = append(documents, mainDoc)
for _, section := range page.Sections {
doc := s.sectionToDocument(section, page, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *AstroDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *AstroDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *AstroDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *AstroDocsScraper) pageToDocument(page *astrodocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", page.Title)
fmt.Fprintf(&content, "%s\n", page.Description)
if len(page.CodeBlocks) > 0 {
fmt.Fprintf(&content, "\n## Code Examples\n")
for _, cb := range page.CodeBlocks {
fmt.Fprintf(&content, "\n```%s\n%s\n```\n", cb.Language, cb.Code)
}
}
metadata := map[string]interface{}{
"title": page.Title,
"doc_url": page.URL,
"doc_type": "astro-docs",
}
return &Document{
ID: generateDocID(page.URL),
Source: sourceName,
Type: "astro-docs",
Title: page.Title,
Content: content.String(),
URL: page.URL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *AstroDocsScraper) sectionToDocument(section *astrodocs.Section, page *astrodocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", section.Title)
fmt.Fprintf(&content, "%s\n", section.Content)
metadata := map[string]interface{}{
"page_title": page.Title,
"section_id": section.ID,
"doc_url": section.DocURL,
"doc_type": "astro-section",
}
return &Document{
ID: generateDocID(section.DocURL),
Source: sourceName,
Type: "astro-section",
Title: fmt.Sprintf("%s - %s", page.Title, section.Title),
Content: content.String(),
URL: section.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+193
View File
@@ -0,0 +1,193 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/cloudflaredocs"
)
type CloudflareDocsScraper struct {
config *Config
parser *cloudflaredocs.Parser
client *http.Client
}
func NewCloudflareDocsScraper(config *Config) *CloudflareDocsScraper {
return &CloudflareDocsScraper{
config: config,
parser: cloudflaredocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *CloudflareDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Cloudflare docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
page, err := s.parser.ParsePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse Cloudflare docs page: %w", err)
}
mainDoc := s.pageToDocument(page, source.Name)
documents = append(documents, mainDoc)
for _, section := range page.Sections {
doc := s.sectionToDocument(section, page, source.Name)
documents = append(documents, doc)
}
for _, api := range page.APIs {
doc := s.apiToDocument(api, page, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *CloudflareDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *CloudflareDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *CloudflareDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *CloudflareDocsScraper) pageToDocument(page *cloudflaredocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", page.Title)
if page.Product != "" {
fmt.Fprintf(&content, "Product: %s\n\n", page.Product)
}
fmt.Fprintf(&content, "%s\n", page.Description)
if len(page.CodeBlocks) > 0 {
fmt.Fprintf(&content, "\n## Code Examples\n")
for _, cb := range page.CodeBlocks {
fmt.Fprintf(&content, "\n```%s\n%s\n```\n", cb.Language, cb.Code)
}
}
metadata := map[string]interface{}{
"title": page.Title,
"product": page.Product,
"doc_url": page.URL,
"doc_type": "cloudflare-docs",
}
return &Document{
ID: generateDocID(page.URL),
Source: sourceName,
Type: "cloudflare-docs",
Title: page.Title,
Content: content.String(),
URL: page.URL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *CloudflareDocsScraper) sectionToDocument(section *cloudflaredocs.Section, page *cloudflaredocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", section.Title)
fmt.Fprintf(&content, "%s\n", section.Content)
metadata := map[string]interface{}{
"page_title": page.Title,
"product": page.Product,
"section_id": section.ID,
"doc_url": section.DocURL,
"doc_type": "cloudflare-section",
}
return &Document{
ID: generateDocID(section.DocURL),
Source: sourceName,
Type: "cloudflare-section",
Title: fmt.Sprintf("%s - %s", page.Title, section.Title),
Content: content.String(),
URL: section.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *CloudflareDocsScraper) apiToDocument(api *cloudflaredocs.API, page *cloudflaredocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s %s\n\n", api.Method, api.Endpoint)
fmt.Fprintf(&content, "%s\n", api.Description)
metadata := map[string]interface{}{
"page_title": page.Title,
"product": page.Product,
"method": api.Method,
"endpoint": api.Endpoint,
"doc_url": api.DocURL,
"doc_type": "cloudflare-api",
}
return &Document{
ID: generateDocID(api.DocURL + "#" + api.Endpoint),
Source: sourceName,
Type: "cloudflare-api",
Title: fmt.Sprintf("%s %s", api.Method, api.Endpoint),
Content: content.String(),
URL: api.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+156
View File
@@ -0,0 +1,156 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/dockerdocs"
)
type DockerDocsScraper struct {
config *Config
parser *dockerdocs.Parser
client *http.Client
}
func NewDockerDocsScraper(config *Config) *DockerDocsScraper {
return &DockerDocsScraper{
config: config,
parser: dockerdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *DockerDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Docker docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
page, err := s.parser.ParsePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse Docker docs page: %w", err)
}
mainDoc := s.pageToDocument(page, source.Name)
documents = append(documents, mainDoc)
for _, section := range page.Sections {
doc := s.sectionToDocument(section, page, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *DockerDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *DockerDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *DockerDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *DockerDocsScraper) pageToDocument(page *dockerdocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", page.Title)
fmt.Fprintf(&content, "%s\n", page.Description)
if len(page.CodeBlocks) > 0 {
fmt.Fprintf(&content, "\n## Code Examples\n")
for _, cb := range page.CodeBlocks {
fmt.Fprintf(&content, "\n```%s\n%s\n```\n", cb.Language, cb.Code)
}
}
metadata := map[string]interface{}{
"title": page.Title,
"doc_url": page.URL,
"doc_type": "docker-docs",
}
return &Document{
ID: generateDocID(page.URL),
Source: sourceName,
Type: "docker-docs",
Title: page.Title,
Content: content.String(),
URL: page.URL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *DockerDocsScraper) sectionToDocument(section *dockerdocs.Section, page *dockerdocs.Page, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", section.Title)
fmt.Fprintf(&content, "%s\n", section.Content)
metadata := map[string]interface{}{
"page_title": page.Title,
"section_id": section.ID,
"doc_url": section.DocURL,
"doc_type": "docker-section",
}
return &Document{
ID: generateDocID(section.DocURL),
Source: sourceName,
Type: "docker-section",
Title: fmt.Sprintf("%s - %s", page.Title, section.Title),
Content: content.String(),
URL: section.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+31
View File
@@ -0,0 +1,31 @@
package scraper
import (
"context"
)
// GitHubScraper scrapes documentation from GitHub repositories.
type GitHubScraper struct {
config *Config
}
// NewGitHubScraper creates a new GitHub scraper.
func NewGitHubScraper(config *Config) *GitHubScraper {
return &GitHubScraper{config: config}
}
// Scrape clones and parses documents from a GitHub repository.
func (s *GitHubScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
// TODO: Implement GitHub scraping
// 1. Clone repository (shallow)
// 2. Find markdown files in specified paths
// 3. Parse README, docs/, wiki
// 4. Extract code structure
return nil, nil
}
// DetectChanges checks if the repository has new commits.
func (s *GitHubScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
// TODO: Check latest commit hash
return false, "", nil
}
+423
View File
@@ -0,0 +1,423 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/godocs"
)
type GoDocsScraper struct {
config *Config
parser *godocs.Parser
client *http.Client
}
func NewGoDocsScraper(config *Config) *GoDocsScraper {
return &GoDocsScraper{
config: config,
parser: godocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *GoDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Go docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
pkg, err := s.parser.ParsePackagePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse package: %w", err)
}
mainDoc := s.packageToDocument(pkg, source.Name)
documents = append(documents, mainDoc)
for _, fn := range pkg.Functions {
doc := s.functionToDocument(fn, pkg, source.Name)
documents = append(documents, doc)
}
for _, t := range pkg.Types {
doc := s.typeToDocument(t, pkg, source.Name)
documents = append(documents, doc)
for _, m := range t.Methods {
methodDoc := s.methodToDocument(m, t, pkg, source.Name)
documents = append(documents, methodDoc)
}
}
for _, c := range pkg.Constants {
doc := s.constantToDocument(c, pkg, source.Name)
documents = append(documents, doc)
}
for _, v := range pkg.Variables {
doc := s.variableToDocument(v, pkg, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *GoDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *GoDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *GoDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *GoDocsScraper) packageToDocument(pkg *godocs.Package, sourceName string) *Document {
content := s.buildPackageContent(pkg)
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"version": pkg.Version,
"imported_by": pkg.ImportedBy,
"repository": pkg.Repository,
"doc_url": pkg.DocURL,
}
if pkg.Module != nil {
metadata["module_path"] = pkg.Module.Path
metadata["module_version"] = pkg.Module.Version
}
if len(pkg.Licenses) > 0 {
var licenses []string
for _, l := range pkg.Licenses {
licenses = append(licenses, l.Name)
}
metadata["licenses"] = licenses
}
return &Document{
ID: generateDocID(pkg.DocURL),
Source: sourceName,
Type: "go-package",
Title: fmt.Sprintf("%s - %s", pkg.Name, pkg.ImportPath),
Content: content,
URL: pkg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) buildPackageContent(pkg *godocs.Package) string {
var parts []string
parts = append(parts, fmt.Sprintf("# Package %s\n", pkg.ImportPath))
if pkg.Synopsis != "" {
parts = append(parts, pkg.Synopsis)
}
if pkg.Doc != "" {
parts = append(parts, "\n## Documentation\n")
parts = append(parts, pkg.Doc)
}
if len(pkg.Functions) > 0 {
parts = append(parts, fmt.Sprintf("\n## Functions (%d)\n", len(pkg.Functions)))
for _, fn := range pkg.Functions {
parts = append(parts, fmt.Sprintf("- `%s`", fn.Signature))
}
}
if len(pkg.Types) > 0 {
parts = append(parts, fmt.Sprintf("\n## Types (%d)\n", len(pkg.Types)))
for _, t := range pkg.Types {
parts = append(parts, fmt.Sprintf("- `%s` (%s)", t.Name, t.Kind))
}
}
if len(pkg.Constants) > 0 {
parts = append(parts, fmt.Sprintf("\n## Constants (%d)\n", len(pkg.Constants)))
}
if len(pkg.Variables) > 0 {
parts = append(parts, fmt.Sprintf("\n## Variables (%d)\n", len(pkg.Variables)))
}
return strings.Join(parts, "\n")
}
func (s *GoDocsScraper) functionToDocument(fn *godocs.Function, pkg *godocs.Package, sourceName string) *Document {
content := s.buildFunctionContent(fn, pkg)
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"symbol": fn.Name,
"signature": fn.Signature,
"kind": "function",
}
examplesJSON, _ := json.Marshal(fn.Examples)
metadata["examples"] = string(examplesJSON)
return &Document{
ID: generateDocID(fmt.Sprintf("%s#%s", pkg.DocURL, fn.Name)),
Source: sourceName,
Type: "go-function",
Title: fmt.Sprintf("%s.%s", pkg.Name, fn.Name),
Content: content,
URL: fmt.Sprintf("%s#%s", pkg.DocURL, fn.Name),
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) buildFunctionContent(fn *godocs.Function, pkg *godocs.Package) string {
var parts []string
parts = append(parts, fmt.Sprintf("# %s.%s\n", pkg.Name, fn.Name))
parts = append(parts, fmt.Sprintf("```\n%s\n```", fn.Signature))
if fn.Doc != "" {
parts = append(parts, "\n"+fn.Doc)
}
for _, ex := range fn.Examples {
parts = append(parts, fmt.Sprintf("\n### Example: %s\n", ex.Name))
if ex.Doc != "" {
parts = append(parts, ex.Doc)
}
parts = append(parts, fmt.Sprintf("```go\n%s\n```", ex.Code))
if ex.Output != "" {
parts = append(parts, fmt.Sprintf("Output:\n```\n%s\n```", ex.Output))
}
}
return strings.Join(parts, "\n")
}
func (s *GoDocsScraper) typeToDocument(t *godocs.Type, pkg *godocs.Package, sourceName string) *Document {
content := s.buildTypeContent(t, pkg)
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"symbol": t.Name,
"kind": "type",
"type_kind": t.Kind,
"underlying": t.Underlying,
"method_count": len(t.Methods),
}
fieldsJSON, _ := json.Marshal(t.Fields)
metadata["fields"] = string(fieldsJSON)
return &Document{
ID: generateDocID(fmt.Sprintf("%s#%s", pkg.DocURL, t.Name)),
Source: sourceName,
Type: "go-type",
Title: fmt.Sprintf("%s.%s", pkg.Name, t.Name),
Content: content,
URL: fmt.Sprintf("%s#%s", pkg.DocURL, t.Name),
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) buildTypeContent(t *godocs.Type, pkg *godocs.Package) string {
var parts []string
parts = append(parts, fmt.Sprintf("# type %s.%s\n", pkg.Name, t.Name))
parts = append(parts, fmt.Sprintf("```\n%s\n```", t.Underlying))
if t.Doc != "" {
parts = append(parts, "\n"+t.Doc)
}
if len(t.Fields) > 0 {
parts = append(parts, "\n### Fields\n")
for _, f := range t.Fields {
if f.Doc != "" {
parts = append(parts, fmt.Sprintf("- `%s %s` - %s", f.Name, f.Type, f.Doc))
} else {
parts = append(parts, fmt.Sprintf("- `%s %s`", f.Name, f.Type))
}
}
}
if len(t.Methods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Methods (%d)\n", len(t.Methods)))
for _, m := range t.Methods {
parts = append(parts, fmt.Sprintf("- `%s`", m.Signature))
}
}
return strings.Join(parts, "\n")
}
func (s *GoDocsScraper) methodToDocument(m *godocs.Method, t *godocs.Type, pkg *godocs.Package, sourceName string) *Document {
content := s.buildMethodContent(m, t, pkg)
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"type": t.Name,
"symbol": m.Name,
"receiver": m.Receiver,
"signature": m.Signature,
"kind": "method",
}
return &Document{
ID: generateDocID(fmt.Sprintf("%s#%s.%s", pkg.DocURL, t.Name, m.Name)),
Source: sourceName,
Type: "go-method",
Title: fmt.Sprintf("%s.%s.%s", pkg.Name, t.Name, m.Name),
Content: content,
URL: fmt.Sprintf("%s#%s.%s", pkg.DocURL, t.Name, m.Name),
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) buildMethodContent(m *godocs.Method, t *godocs.Type, pkg *godocs.Package) string {
var parts []string
parts = append(parts, fmt.Sprintf("# func (%s) %s\n", m.Receiver, m.Name))
parts = append(parts, fmt.Sprintf("```\n%s\n```", m.Signature))
if m.Doc != "" {
parts = append(parts, "\n"+m.Doc)
}
return strings.Join(parts, "\n")
}
func (s *GoDocsScraper) constantToDocument(c *godocs.Value, pkg *godocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Constants\n\n")
if c.Doc != "" {
fmt.Fprintf(&content, "%s\n\n", c.Doc)
}
if len(c.Names) > 1 {
fmt.Fprintf(&content, "```go\nconst (\n")
for _, name := range c.Names {
fmt.Fprintf(&content, "\t%s\n", name)
}
fmt.Fprintf(&content, ")\n```")
} else {
fmt.Fprintf(&content, "```go\nconst %s = %s\n```", c.Name, c.Value)
}
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"names": c.Names,
"kind": "constant",
}
return &Document{
ID: generateDocID(fmt.Sprintf("%s#const-%s", pkg.DocURL, c.Name)),
Source: sourceName,
Type: "go-constant",
Title: fmt.Sprintf("%s.%s (const)", pkg.Name, c.Name),
Content: content.String(),
URL: pkg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *GoDocsScraper) variableToDocument(v *godocs.Value, pkg *godocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Variables\n\n")
if v.Doc != "" {
fmt.Fprintf(&content, "%s\n\n", v.Doc)
}
fmt.Fprintf(&content, "```go\nvar %s", v.Name)
if v.Type != "" {
fmt.Fprintf(&content, " %s", v.Type)
}
if v.Value != "" {
fmt.Fprintf(&content, " = %s", v.Value)
}
fmt.Fprintf(&content, "\n```")
metadata := map[string]interface{}{
"import_path": pkg.ImportPath,
"package": pkg.Name,
"name": v.Name,
"type": v.Type,
"kind": "variable",
}
return &Document{
ID: generateDocID(fmt.Sprintf("%s#var-%s", pkg.DocURL, v.Name)),
Source: sourceName,
Type: "go-variable",
Title: fmt.Sprintf("%s.%s (var)", pkg.Name, v.Name),
Content: content.String(),
URL: pkg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+254
View File
@@ -0,0 +1,254 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/javadocs"
)
type JavaDocsScraper struct {
config *Config
parser *javadocs.Parser
client *http.Client
}
func NewJavaDocsScraper(config *Config) *JavaDocsScraper {
return &JavaDocsScraper{
config: config,
parser: javadocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *JavaDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Java docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
pkg, err := s.parser.ParsePackagePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse package: %w", err)
}
mainDoc := s.packageToDocument(pkg, source.Name)
documents = append(documents, mainDoc)
for _, class := range pkg.Classes {
doc := s.classToDocument(class, pkg, source.Name)
documents = append(documents, doc)
}
for _, iface := range pkg.Interfaces {
doc := s.interfaceToDocument(iface, pkg, source.Name)
documents = append(documents, doc)
}
for _, enum := range pkg.Enums {
doc := s.enumToDocument(enum, pkg, source.Name)
documents = append(documents, doc)
}
for _, exc := range pkg.Exceptions {
doc := s.exceptionToDocument(exc, pkg, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *JavaDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *JavaDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *JavaDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *JavaDocsScraper) packageToDocument(pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Package %s\n\n", pkg.Name)
fmt.Fprintf(&content, "%s\n", pkg.Doc)
metadata := map[string]interface{}{
"package": pkg.Name,
"doc_url": pkg.DocURL,
"doc_type": "java-package",
}
return &Document{
ID: generateDocID(pkg.DocURL),
Source: sourceName,
Type: "java-package",
Title: pkg.Name,
Content: content.String(),
URL: pkg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *JavaDocsScraper) classToDocument(class *javadocs.Class, pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", class.QualifiedName)
fmt.Fprintf(&content, "Kind: %s\n\n", class.Kind)
fmt.Fprintf(&content, "%s\n", class.Doc)
if len(class.Methods) > 0 {
fmt.Fprintf(&content, "\n## Methods\n")
for _, m := range class.Methods {
fmt.Fprintf(&content, "- `%s`\n", m.Signature)
}
}
metadata := map[string]interface{}{
"package": pkg.Name,
"qualified_name": class.QualifiedName,
"kind": string(class.Kind),
"doc_url": class.DocURL,
}
return &Document{
ID: generateDocID(class.DocURL),
Source: sourceName,
Type: "java-class",
Title: class.QualifiedName,
Content: content.String(),
URL: class.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *JavaDocsScraper) interfaceToDocument(iface *javadocs.Class, pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (interface)\n\n", iface.QualifiedName)
fmt.Fprintf(&content, "%s\n", iface.Doc)
metadata := map[string]interface{}{
"package": pkg.Name,
"qualified_name": iface.QualifiedName,
"kind": "interface",
"doc_url": iface.DocURL,
}
return &Document{
ID: generateDocID(iface.DocURL),
Source: sourceName,
Type: "java-interface",
Title: iface.QualifiedName,
Content: content.String(),
URL: iface.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *JavaDocsScraper) enumToDocument(enum *javadocs.Enum, pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (enum)\n\n", enum.QualifiedName)
fmt.Fprintf(&content, "%s\n", enum.Doc)
if len(enum.Constants) > 0 {
fmt.Fprintf(&content, "\n## Constants\n")
for _, c := range enum.Constants {
fmt.Fprintf(&content, "- `%s`\n", c.Name)
}
}
metadata := map[string]interface{}{
"package": pkg.Name,
"qualified_name": enum.QualifiedName,
"kind": "enum",
"doc_url": enum.DocURL,
}
return &Document{
ID: generateDocID(enum.DocURL),
Source: sourceName,
Type: "java-enum",
Title: enum.QualifiedName,
Content: content.String(),
URL: enum.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *JavaDocsScraper) exceptionToDocument(exc *javadocs.Class, pkg *javadocs.Package, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (exception)\n\n", exc.QualifiedName)
fmt.Fprintf(&content, "%s\n", exc.Doc)
metadata := map[string]interface{}{
"package": pkg.Name,
"qualified_name": exc.QualifiedName,
"kind": "exception",
"doc_url": exc.DocURL,
}
return &Document{
ID: generateDocID(exc.DocURL),
Source: sourceName,
Type: "java-exception",
Title: exc.QualifiedName,
Content: content.String(),
URL: exc.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+31
View File
@@ -0,0 +1,31 @@
package scraper
import (
"context"
)
// LocalScraper scrapes documentation from local filesystem.
type LocalScraper struct {
config *Config
}
// NewLocalScraper creates a new local scraper.
func NewLocalScraper(config *Config) *LocalScraper {
return &LocalScraper{config: config}
}
// Scrape scans and parses documents from a local directory.
func (s *LocalScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
// TODO: Implement local scraping
// 1. Walk directory tree
// 2. Filter by include/exclude patterns
// 3. Parse markdown, text, code files
// 4. Extract structure and content
return nil, nil
}
// DetectChanges checks if files have been modified.
func (s *LocalScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
// TODO: Check file modification times
return false, "", nil
}
+222
View File
@@ -0,0 +1,222 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/mcpdocs"
)
type MCPDocsScraper struct {
config *Config
parser *mcpdocs.Parser
client *http.Client
}
func NewMCPDocsScraper(config *Config) *MCPDocsScraper {
return &MCPDocsScraper{
config: config,
parser: mcpdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *MCPDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for MCP docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
server, err := s.parser.ParseServerPage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse MCP server page: %w", err)
}
mainDoc := s.serverToDocument(server, source.Name)
documents = append(documents, mainDoc)
for _, tool := range server.Tools {
doc := s.toolToDocument(tool, server, source.Name)
documents = append(documents, doc)
}
for _, res := range server.Resources {
doc := s.resourceToDocument(res, server, source.Name)
documents = append(documents, doc)
}
for _, prompt := range server.Prompts {
doc := s.promptToDocument(prompt, server, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *MCPDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *MCPDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *MCPDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *MCPDocsScraper) serverToDocument(server *mcpdocs.Server, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", server.Name)
fmt.Fprintf(&content, "%s\n", server.Description)
if len(server.Tools) > 0 {
fmt.Fprintf(&content, "\n## Tools (%d)\n", len(server.Tools))
for _, t := range server.Tools {
fmt.Fprintf(&content, "- `%s`: %s\n", t.Name, t.Description)
}
}
metadata := map[string]interface{}{
"server": server.Name,
"category": server.Category,
"doc_url": server.DocURL,
"doc_type": "mcp-server",
}
return &Document{
ID: generateDocID(server.DocURL),
Source: sourceName,
Type: "mcp-server",
Title: server.Name,
Content: content.String(),
URL: server.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *MCPDocsScraper) toolToDocument(tool *mcpdocs.Tool, server *mcpdocs.Server, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", tool.Name)
fmt.Fprintf(&content, "Server: %s\n\n", server.Name)
fmt.Fprintf(&content, "%s\n", tool.Description)
metadata := map[string]interface{}{
"server": server.Name,
"tool": tool.Name,
"doc_url": tool.DocURL,
"doc_type": "mcp-tool",
}
return &Document{
ID: generateDocID(tool.DocURL + "#" + tool.Name),
Source: sourceName,
Type: "mcp-tool",
Title: fmt.Sprintf("%s.%s", server.Name, tool.Name),
Content: content.String(),
URL: tool.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *MCPDocsScraper) resourceToDocument(res *mcpdocs.Resource, server *mcpdocs.Server, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", res.Name)
fmt.Fprintf(&content, "Server: %s\n", server.Name)
fmt.Fprintf(&content, "URI: %s\n\n", res.URI)
fmt.Fprintf(&content, "%s\n", res.Description)
metadata := map[string]interface{}{
"server": server.Name,
"resource": res.Name,
"uri": res.URI,
"doc_url": res.DocURL,
"doc_type": "mcp-resource",
}
return &Document{
ID: generateDocID(res.DocURL + "#" + res.Name),
Source: sourceName,
Type: "mcp-resource",
Title: res.Name,
Content: content.String(),
URL: res.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *MCPDocsScraper) promptToDocument(prompt *mcpdocs.Prompt, server *mcpdocs.Server, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", prompt.Name)
fmt.Fprintf(&content, "Server: %s\n\n", server.Name)
fmt.Fprintf(&content, "%s\n", prompt.Description)
metadata := map[string]interface{}{
"server": server.Name,
"prompt": prompt.Name,
"doc_url": prompt.DocURL,
"doc_type": "mcp-prompt",
}
return &Document{
ID: generateDocID(prompt.DocURL + "#" + prompt.Name),
Source: sourceName,
Type: "mcp-prompt",
Title: prompt.Name,
Content: content.String(),
URL: prompt.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+298
View File
@@ -0,0 +1,298 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/nuxtdocs"
)
type NuxtDocsScraper struct {
config *Config
parser *nuxtdocs.Parser
client *http.Client
}
func NewNuxtDocsScraper(config *Config) *NuxtDocsScraper {
return &NuxtDocsScraper{
config: config,
parser: nuxtdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *NuxtDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Nuxt docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
ref, err := s.parser.ParseReferencePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse reference: %w", err)
}
mainDoc := s.referenceToDocument(ref, source.Name)
documents = append(documents, mainDoc)
for _, comp := range ref.Components {
doc := s.componentToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, comp := range ref.Composables {
doc := s.composableToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, util := range ref.Utilities {
doc := s.utilityToDocument(util, source.Name)
documents = append(documents, doc)
}
for _, cfg := range ref.Configs {
doc := s.configToDocument(cfg, source.Name)
documents = append(documents, doc)
}
for _, cmd := range ref.Commands {
doc := s.commandToDocument(cmd, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *NuxtDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *NuxtDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *NuxtDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *NuxtDocsScraper) referenceToDocument(ref *nuxtdocs.Reference, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Nuxt API Reference\n\n")
fmt.Fprintf(&content, "Components: %d, Composables: %d, Utilities: %d, Configs: %d, Commands: %d\n",
len(ref.Components), len(ref.Composables), len(ref.Utilities), len(ref.Configs), len(ref.Commands))
return &Document{
ID: generateDocID(ref.DocURL),
Source: sourceName,
Type: "nuxt-reference",
Title: "Nuxt API Reference",
Content: content.String(),
URL: ref.DocURL,
Metadata: map[string]interface{}{"doc_type": "nuxt-reference"},
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) componentToDocument(comp *nuxtdocs.Component, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# <%s />\n\n", comp.Name)
fmt.Fprintf(&content, "%s\n", comp.Doc)
if len(comp.Props) > 0 {
fmt.Fprintf(&content, "\n## Props\n")
for _, p := range comp.Props {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"name": comp.Name,
"category": comp.Category,
"doc_url": comp.DocURL,
"doc_type": "nuxt-component",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "nuxt-component",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) composableToDocument(comp *nuxtdocs.Composable, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", comp.Name)
if comp.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", comp.Signature)
}
fmt.Fprintf(&content, "%s\n", comp.Doc)
if comp.Returns != "" {
fmt.Fprintf(&content, "\n**Returns:** `%s`\n", comp.Returns)
}
metadata := map[string]interface{}{
"name": comp.Name,
"category": comp.Category,
"doc_url": comp.DocURL,
"doc_type": "nuxt-composable",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "nuxt-composable",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) utilityToDocument(util *nuxtdocs.Utility, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", util.Name)
if util.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", util.Signature)
}
fmt.Fprintf(&content, "%s\n", util.Doc)
metadata := map[string]interface{}{
"name": util.Name,
"doc_url": util.DocURL,
"doc_type": "nuxt-utility",
}
return &Document{
ID: generateDocID(util.DocURL),
Source: sourceName,
Type: "nuxt-utility",
Title: util.Name,
Content: content.String(),
URL: util.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) configToDocument(cfg *nuxtdocs.Config, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", cfg.Name)
if cfg.Type != "" {
fmt.Fprintf(&content, "Type: `%s`\n\n", cfg.Type)
}
if cfg.Default != "" {
fmt.Fprintf(&content, "Default: `%s`\n\n", cfg.Default)
}
fmt.Fprintf(&content, "%s\n", cfg.Doc)
metadata := map[string]interface{}{
"name": cfg.Name,
"type": cfg.Type,
"default": cfg.Default,
"category": cfg.Category,
"doc_url": cfg.DocURL,
"doc_type": "nuxt-config",
}
return &Document{
ID: generateDocID(cfg.DocURL),
Source: sourceName,
Type: "nuxt-config",
Title: cfg.Name,
Content: content.String(),
URL: cfg.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *NuxtDocsScraper) commandToDocument(cmd *nuxtdocs.Command, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", cmd.Name)
if cmd.Usage != "" {
fmt.Fprintf(&content, "```\n%s\n```\n\n", cmd.Usage)
}
fmt.Fprintf(&content, "%s\n", cmd.Doc)
if len(cmd.Flags) > 0 {
fmt.Fprintf(&content, "\n## Flags\n")
for _, f := range cmd.Flags {
fmt.Fprintf(&content, "- `--%s`: %s\n", f.Name, f.Doc)
}
}
metadata := map[string]interface{}{
"name": cmd.Name,
"usage": cmd.Usage,
"doc_url": cmd.DocURL,
"doc_type": "nuxt-command",
}
return &Document{
ID: generateDocID(cmd.DocURL),
Source: sourceName,
Type: "nuxt-command",
Title: cmd.Name,
Content: content.String(),
URL: cmd.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+31
View File
@@ -0,0 +1,31 @@
package scraper
import (
"context"
)
// OpenAPIScraper parses OpenAPI/Swagger specifications.
type OpenAPIScraper struct {
config *Config
}
// NewOpenAPIScraper creates a new OpenAPI scraper.
func NewOpenAPIScraper(config *Config) *OpenAPIScraper {
return &OpenAPIScraper{config: config}
}
// Scrape fetches and parses an OpenAPI specification.
func (s *OpenAPIScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
// TODO: Implement OpenAPI parsing
// 1. Fetch spec from URL
// 2. Parse endpoints, schemas, descriptions
// 3. Create documents per endpoint
// 4. Include authentication, parameters
return nil, nil
}
// DetectChanges checks if the spec has been updated.
func (s *OpenAPIScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
// TODO: Check spec content hash
return false, "", nil
}
+463
View File
@@ -0,0 +1,463 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/pythondocs"
)
type PythonDocsScraper struct {
config *Config
parser *pythondocs.Parser
client *http.Client
}
func NewPythonDocsScraper(config *Config) *PythonDocsScraper {
return &PythonDocsScraper{
config: config,
parser: pythondocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *PythonDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Python docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
module, err := s.parser.ParseModulePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse module: %w", err)
}
mainDoc := s.moduleToDocument(module, source.Name)
documents = append(documents, mainDoc)
for _, class := range module.Classes {
doc := s.classToDocument(class, module, source.Name)
documents = append(documents, doc)
for _, method := range class.Methods {
methodDoc := s.methodToDocument(method, class, module, source.Name)
documents = append(documents, methodDoc)
}
for _, method := range class.ClassMethods {
methodDoc := s.classMethodToDocument(method, class, module, source.Name)
documents = append(documents, methodDoc)
}
for _, attr := range class.Attributes {
attrDoc := s.attributeToDocument(attr, class, module, source.Name)
documents = append(documents, attrDoc)
}
}
for _, fn := range module.Functions {
doc := s.functionToDocument(fn, module, source.Name)
documents = append(documents, doc)
}
for _, exc := range module.Exceptions {
doc := s.exceptionToDocument(exc, module, source.Name)
documents = append(documents, doc)
}
for _, data := range module.Constants {
doc := s.dataToDocument(data, module, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *PythonDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *PythonDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *PythonDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *PythonDocsScraper) moduleToDocument(module *pythondocs.Module, sourceName string) *Document {
content := s.buildModuleContent(module)
metadata := map[string]interface{}{
"name": module.Name,
"path": module.Path,
"version": module.Version,
"doc_url": module.DocURL,
"class_count": len(module.Classes),
"function_count": len(module.Functions),
"exception_count": len(module.Exceptions),
"data_count": len(module.Constants),
}
return &Document{
ID: generateDocID(module.DocURL),
Source: sourceName,
Type: "python-module",
Title: fmt.Sprintf("%s - Python", module.Name),
Content: content,
URL: module.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) buildModuleContent(module *pythondocs.Module) string {
var parts []string
parts = append(parts, fmt.Sprintf("# Module %s\n", module.Name))
if module.Synopsis != "" {
parts = append(parts, module.Synopsis)
}
if module.Doc != "" {
parts = append(parts, "\n"+module.Doc)
}
if len(module.Classes) > 0 {
parts = append(parts, fmt.Sprintf("\n## Classes (%d)\n", len(module.Classes)))
for _, class := range module.Classes {
parts = append(parts, fmt.Sprintf("- `%s`", class.Name))
}
}
if len(module.Functions) > 0 {
parts = append(parts, fmt.Sprintf("\n## Functions (%d)\n", len(module.Functions)))
for _, fn := range module.Functions {
parts = append(parts, fmt.Sprintf("- `%s`", fn.Name))
}
}
if len(module.Exceptions) > 0 {
parts = append(parts, fmt.Sprintf("\n## Exceptions (%d)\n", len(module.Exceptions)))
for _, exc := range module.Exceptions {
parts = append(parts, fmt.Sprintf("- `%s`", exc.Name))
}
}
return strings.Join(parts, "\n")
}
func (s *PythonDocsScraper) classToDocument(class *pythondocs.Class, module *pythondocs.Module, sourceName string) *Document {
content := s.buildClassContent(class, module)
metadata := map[string]interface{}{
"module": module.Name,
"class": class.Name,
"qual_name": class.QualName,
"bases": class.Bases,
"method_count": len(class.Methods),
"attribute_count": len(class.Attributes),
}
return &Document{
ID: generateDocID(class.DocURL),
Source: sourceName,
Type: "python-class",
Title: fmt.Sprintf("%s.%s - Python", module.Name, class.Name),
Content: content,
URL: class.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) buildClassContent(class *pythondocs.Class, module *pythondocs.Module) string {
var parts []string
parts = append(parts, fmt.Sprintf("# class %s.%s\n", module.Name, class.Name))
if class.Signature != "" {
parts = append(parts, fmt.Sprintf("```python\n%s\n```", class.Signature))
}
if class.Doc != "" {
parts = append(parts, "\n"+class.Doc)
}
if len(class.Bases) > 0 {
parts = append(parts, fmt.Sprintf("\n**Bases:** %s\n", strings.Join(class.Bases, ", ")))
}
if len(class.Methods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Methods (%d)\n", len(class.Methods)))
for _, m := range class.Methods {
parts = append(parts, fmt.Sprintf("- `%s`", m.Name))
}
}
if len(class.ClassMethods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Class Methods (%d)\n", len(class.ClassMethods)))
for _, m := range class.ClassMethods {
parts = append(parts, fmt.Sprintf("- `%s` (classmethod)", m.Name))
}
}
if len(class.Attributes) > 0 {
parts = append(parts, fmt.Sprintf("\n### Attributes (%d)\n", len(class.Attributes)))
for _, a := range class.Attributes {
parts = append(parts, fmt.Sprintf("- `%s`", a.Name))
}
}
return strings.Join(parts, "\n")
}
func (s *PythonDocsScraper) methodToDocument(method *pythondocs.Method, class *pythondocs.Class, module *pythondocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s.%s.%s\n\n", module.Name, class.Name, method.Name)
if method.Signature != "" {
fmt.Fprintf(&content, "```python\n%s\n```\n", method.Signature)
}
if method.Doc != "" {
fmt.Fprintf(&content, "%s\n", method.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"class": class.Name,
"method": method.Name,
"qual_name": method.QualName,
"is_static": method.IsStatic,
"is_async": method.IsAsync,
}
return &Document{
ID: generateDocID(method.DocURL),
Source: sourceName,
Type: "python-method",
Title: fmt.Sprintf("%s.%s.%s - Python", module.Name, class.Name, method.Name),
Content: content.String(),
URL: method.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) classMethodToDocument(method *pythondocs.Method, class *pythondocs.Class, module *pythondocs.Module, sourceName string) *Document {
content := s.buildMethodContent(method, class, module)
metadata := map[string]interface{}{
"module": module.Name,
"class": class.Name,
"method": method.Name,
"qual_name": method.QualName,
"is_classmethod": true,
}
return &Document{
ID: generateDocID(method.DocURL),
Source: sourceName,
Type: "python-classmethod",
Title: fmt.Sprintf("%s.%s.%s (classmethod) - Python", module.Name, class.Name, method.Name),
Content: content,
URL: method.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) buildMethodContent(method *pythondocs.Method, class *pythondocs.Class, module *pythondocs.Module) string {
var parts []string
parts = append(parts, fmt.Sprintf("# %s.%s.%s\n", module.Name, class.Name, method.Name))
if method.Signature != "" {
parts = append(parts, fmt.Sprintf("```python\n%s\n```", method.Signature))
}
if method.Doc != "" {
parts = append(parts, "\n"+method.Doc)
}
return strings.Join(parts, "\n")
}
func (s *PythonDocsScraper) attributeToDocument(attr *pythondocs.Attribute, class *pythondocs.Class, module *pythondocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s.%s.%s\n\n", module.Name, class.Name, attr.Name)
if attr.Doc != "" {
fmt.Fprintf(&content, "%s\n", attr.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"class": class.Name,
"attr": attr.Name,
"type": attr.Type,
}
return &Document{
ID: generateDocID(attr.DocURL),
Source: sourceName,
Type: "python-attribute",
Title: fmt.Sprintf("%s.%s.%s - Python", module.Name, class.Name, attr.Name),
Content: content.String(),
URL: attr.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) functionToDocument(fn *pythondocs.Function, module *pythondocs.Module, sourceName string) *Document {
content := s.buildFunctionContent(fn, module)
metadata := map[string]interface{}{
"module": module.Name,
"function": fn.Name,
"qual_name": fn.QualName,
"signature": fn.Signature,
"is_async": fn.IsAsync,
"is_generator": fn.IsGenerator,
}
return &Document{
ID: generateDocID(fn.DocURL),
Source: sourceName,
Type: "python-function",
Title: fmt.Sprintf("%s.%s - Python", module.Name, fn.Name),
Content: content,
URL: fn.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) buildFunctionContent(fn *pythondocs.Function, module *pythondocs.Module) string {
var parts []string
parts = append(parts, fmt.Sprintf("# %s.%s\n", module.Name, fn.Name))
if fn.Signature != "" {
parts = append(parts, fmt.Sprintf("```python\n%s\n```", fn.Signature))
}
if fn.Doc != "" {
parts = append(parts, "\n"+fn.Doc)
}
return strings.Join(parts, "\n")
}
func (s *PythonDocsScraper) exceptionToDocument(exc *pythondocs.Exception, module *pythondocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s.%s\n\n", module.Name, exc.Name)
if exc.Signature != "" {
fmt.Fprintf(&content, "```python\n%s\n```\n", exc.Signature)
}
if exc.Doc != "" {
fmt.Fprintf(&content, "%s\n", exc.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"exception": exc.Name,
"qual_name": exc.QualName,
"bases": exc.Bases,
}
return &Document{
ID: generateDocID(exc.DocURL),
Source: sourceName,
Type: "python-exception",
Title: fmt.Sprintf("%s.%s - Python", module.Name, exc.Name),
Content: content.String(),
URL: exc.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *PythonDocsScraper) dataToDocument(data *pythondocs.Data, module *pythondocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s.%s\n\n", module.Name, data.Name)
if data.Doc != "" {
fmt.Fprintf(&content, "%s\n", data.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"data": data.Name,
"type": data.Type,
"value": data.Value,
}
return &Document{
ID: generateDocID(data.DocURL),
Source: sourceName,
Type: "python-data",
Title: fmt.Sprintf("%s.%s - Python", module.Name, data.Name),
Content: content.String(),
URL: data.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+214
View File
@@ -0,0 +1,214 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/reactdocs"
)
type ReactDocsScraper struct {
config *Config
parser *reactdocs.Parser
client *http.Client
}
func NewReactDocsScraper(config *Config) *ReactDocsScraper {
return &ReactDocsScraper{
config: config,
parser: reactdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *ReactDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for React docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
ref, err := s.parser.ParseReferencePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse reference: %w", err)
}
mainDoc := s.referenceToDocument(ref, source.Name)
documents = append(documents, mainDoc)
for _, hook := range ref.Hooks {
doc := s.hookToDocument(hook, source.Name)
documents = append(documents, doc)
}
for _, comp := range ref.Components {
doc := s.componentToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, api := range ref.APIs {
doc := s.apiToDocument(api, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *ReactDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *ReactDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *ReactDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *ReactDocsScraper) referenceToDocument(ref *reactdocs.Reference, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# React API Reference\n\n")
fmt.Fprintf(&content, "Hooks: %d, Components: %d, APIs: %d\n", len(ref.Hooks), len(ref.Components), len(ref.APIs))
return &Document{
ID: generateDocID(ref.DocURL),
Source: sourceName,
Type: "react-reference",
Title: "React API Reference",
Content: content.String(),
URL: ref.DocURL,
Metadata: map[string]interface{}{"doc_type": "react-reference"},
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *ReactDocsScraper) hookToDocument(hook *reactdocs.Hook, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", hook.Name)
if hook.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", hook.Signature)
}
fmt.Fprintf(&content, "%s\n", hook.Doc)
metadata := map[string]interface{}{
"name": hook.Name,
"category": hook.Category,
"doc_url": hook.DocURL,
"doc_type": "react-hook",
}
return &Document{
ID: generateDocID(hook.DocURL),
Source: sourceName,
Type: "react-hook",
Title: hook.Name,
Content: content.String(),
URL: hook.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *ReactDocsScraper) componentToDocument(comp *reactdocs.Component, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# <%s />\n\n", comp.Name)
fmt.Fprintf(&content, "%s\n", comp.Doc)
if len(comp.Props) > 0 {
fmt.Fprintf(&content, "\n## Props\n")
for _, p := range comp.Props {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"name": comp.Name,
"doc_url": comp.DocURL,
"doc_type": "react-component",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "react-component",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *ReactDocsScraper) apiToDocument(api *reactdocs.API, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s()\n\n", api.Name)
if api.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", api.Signature)
}
fmt.Fprintf(&content, "%s\n", api.Doc)
metadata := map[string]interface{}{
"name": api.Name,
"doc_url": api.DocURL,
"doc_type": "react-api",
}
return &Document{
ID: generateDocID(api.DocURL),
Source: sourceName,
Type: "react-api",
Title: api.Name,
Content: content.String(),
URL: api.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+563
View File
@@ -0,0 +1,563 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/rustdocs"
)
type RustDocsScraper struct {
config *Config
parser *rustdocs.Parser
client *http.Client
}
func NewRustDocsScraper(config *Config) *RustDocsScraper {
return &RustDocsScraper{
config: config,
parser: rustdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *RustDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Rust docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
crate, err := s.parser.ParseCratePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse crate: %w", err)
}
mainDoc := s.crateToDocument(crate, source.Name)
documents = append(documents, mainDoc)
for _, m := range crate.Modules {
doc := s.moduleToDocument(m, crate, source.Name)
documents = append(documents, doc)
}
for _, st := range crate.Structs {
doc := s.structToDocument(st, crate, source.Name)
documents = append(documents, doc)
}
for _, e := range crate.Enums {
doc := s.enumToDocument(e, crate, source.Name)
documents = append(documents, doc)
}
for _, t := range crate.Traits {
doc := s.traitToDocument(t, crate, source.Name)
documents = append(documents, doc)
}
for _, f := range crate.Functions {
doc := s.funcToDocument(f, crate, source.Name)
documents = append(documents, doc)
}
for _, m := range crate.Macros {
doc := s.macroToDocument(m, crate, source.Name)
documents = append(documents, doc)
}
for _, c := range crate.Constants {
doc := s.constToDocument(c, crate, source.Name)
documents = append(documents, doc)
}
for _, st := range crate.Statics {
doc := s.staticToDocument(st, crate, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *RustDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *RustDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *RustDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *RustDocsScraper) crateToDocument(crate *rustdocs.Crate, sourceName string) *Document {
content := s.buildCrateContent(crate)
metadata := map[string]interface{}{
"name": crate.Name,
"version": crate.Version,
"repository": crate.Repository,
"license": crate.License,
"doc_url": crate.DocURL,
"module_count": len(crate.Modules),
"struct_count": len(crate.Structs),
"enum_count": len(crate.Enums),
"trait_count": len(crate.Traits),
"function_count": len(crate.Functions),
"macro_count": len(crate.Macros),
"constant_count": len(crate.Constants),
"static_count": len(crate.Statics),
}
return &Document{
ID: generateDocID(crate.DocURL),
Source: sourceName,
Type: "rust-crate",
Title: fmt.Sprintf("%s - Rust", crate.Name),
Content: content,
URL: crate.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) buildCrateContent(crate *rustdocs.Crate) string {
var parts []string
parts = append(parts, fmt.Sprintf("# Crate %s\n", crate.Name))
if crate.Version != "" {
parts = append(parts, fmt.Sprintf("Version: %s\n", crate.Version))
}
if crate.Description != "" {
parts = append(parts, crate.Description)
}
if len(crate.Modules) > 0 {
parts = append(parts, fmt.Sprintf("\n## Modules (%d)\n", len(crate.Modules)))
for _, m := range crate.Modules {
parts = append(parts, fmt.Sprintf("- `%s`", m.Name))
}
}
if len(crate.Structs) > 0 {
parts = append(parts, fmt.Sprintf("\n## Structs (%d)\n", len(crate.Structs)))
for _, st := range crate.Structs {
parts = append(parts, fmt.Sprintf("- `%s`", st.Name))
}
}
if len(crate.Enums) > 0 {
parts = append(parts, fmt.Sprintf("\n## Enums (%d)\n", len(crate.Enums)))
for _, e := range crate.Enums {
parts = append(parts, fmt.Sprintf("- `%s`", e.Name))
}
}
if len(crate.Traits) > 0 {
parts = append(parts, fmt.Sprintf("\n## Traits (%d)\n", len(crate.Traits)))
for _, t := range crate.Traits {
parts = append(parts, fmt.Sprintf("- `%s`", t.Name))
}
}
if len(crate.Functions) > 0 {
parts = append(parts, fmt.Sprintf("\n## Functions (%d)\n", len(crate.Functions)))
for _, f := range crate.Functions {
parts = append(parts, fmt.Sprintf("- `%s`", f.Name))
}
}
if len(crate.Macros) > 0 {
parts = append(parts, fmt.Sprintf("\n## Macros (%d)\n", len(crate.Macros)))
for _, m := range crate.Macros {
parts = append(parts, fmt.Sprintf("- `%s`", m.Name))
}
}
return strings.Join(parts, "\n")
}
func (s *RustDocsScraper) moduleToDocument(m *rustdocs.Module, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Module %s::%s\n\n", crate.Name, m.Name)
if m.Doc != "" {
fmt.Fprintf(&content, "%s\n", m.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"module": m.Name,
"path": m.Path,
"is_experimental": m.IsExperimental,
"kind": "module",
}
return &Document{
ID: generateDocID(m.DocURL),
Source: sourceName,
Type: "rust-module",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, m.Name),
Content: content.String(),
URL: m.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) structToDocument(st *rustdocs.Struct, crate *rustdocs.Crate, sourceName string) *Document {
content := s.buildStructContent(st, crate)
metadata := map[string]interface{}{
"crate": crate.Name,
"struct": st.Name,
"path": st.Path,
"is_experimental": st.IsExperimental,
"kind": "struct",
"declaration": st.Declaration,
}
fieldsJSON, _ := json.Marshal(st.Fields)
metadata["fields"] = string(fieldsJSON)
return &Document{
ID: generateDocID(st.DocURL),
Source: sourceName,
Type: "rust-struct",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, st.Name),
Content: content,
URL: st.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) buildStructContent(st *rustdocs.Struct, crate *rustdocs.Crate) string {
var parts []string
parts = append(parts, fmt.Sprintf("# struct %s::%s\n", crate.Name, st.Name))
if st.Declaration != "" {
parts = append(parts, fmt.Sprintf("```rust\n%s\n```", st.Declaration))
}
if st.Doc != "" {
parts = append(parts, "\n"+st.Doc)
}
if len(st.Fields) > 0 {
parts = append(parts, "\n### Fields\n")
for _, f := range st.Fields {
if f.Doc != "" {
parts = append(parts, fmt.Sprintf("- `%s: %s` - %s", f.Name, f.Type, f.Doc))
} else {
parts = append(parts, fmt.Sprintf("- `%s: %s`", f.Name, f.Type))
}
}
}
if len(st.Methods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Methods (%d)\n", len(st.Methods)))
for _, m := range st.Methods {
parts = append(parts, fmt.Sprintf("- `%s`", m.Name))
}
}
return strings.Join(parts, "\n")
}
func (s *RustDocsScraper) enumToDocument(e *rustdocs.Enum, crate *rustdocs.Crate, sourceName string) *Document {
content := s.buildEnumContent(e, crate)
metadata := map[string]interface{}{
"crate": crate.Name,
"enum": e.Name,
"path": e.Path,
"is_experimental": e.IsExperimental,
"kind": "enum",
"declaration": e.Declaration,
}
return &Document{
ID: generateDocID(e.DocURL),
Source: sourceName,
Type: "rust-enum",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, e.Name),
Content: content,
URL: e.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) buildEnumContent(e *rustdocs.Enum, crate *rustdocs.Crate) string {
var parts []string
parts = append(parts, fmt.Sprintf("# enum %s::%s\n", crate.Name, e.Name))
if e.Declaration != "" {
parts = append(parts, fmt.Sprintf("```rust\n%s\n```", e.Declaration))
}
if e.Doc != "" {
parts = append(parts, "\n"+e.Doc)
}
if len(e.Variants) > 0 {
parts = append(parts, "\n### Variants\n")
for _, v := range e.Variants {
if v.Doc != "" {
parts = append(parts, fmt.Sprintf("- `%s` - %s", v.Name, v.Doc))
} else {
parts = append(parts, fmt.Sprintf("- `%s`", v.Name))
}
}
}
return strings.Join(parts, "\n")
}
func (s *RustDocsScraper) traitToDocument(t *rustdocs.Trait, crate *rustdocs.Crate, sourceName string) *Document {
content := s.buildTraitContent(t, crate)
metadata := map[string]interface{}{
"crate": crate.Name,
"trait": t.Name,
"path": t.Path,
"is_experimental": t.IsExperimental,
"kind": "trait",
"declaration": t.Declaration,
}
return &Document{
ID: generateDocID(t.DocURL),
Source: sourceName,
Type: "rust-trait",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, t.Name),
Content: content,
URL: t.DocURL,
Metadata: metadata,
Hash: s.generateHash(content),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) buildTraitContent(t *rustdocs.Trait, crate *rustdocs.Crate) string {
var parts []string
parts = append(parts, fmt.Sprintf("# trait %s::%s\n", crate.Name, t.Name))
if t.Declaration != "" {
parts = append(parts, fmt.Sprintf("```rust\n%s\n```", t.Declaration))
}
if t.Doc != "" {
parts = append(parts, "\n"+t.Doc)
}
if len(t.Methods) > 0 {
parts = append(parts, fmt.Sprintf("\n### Required Methods (%d)\n", len(t.Methods)))
for _, m := range t.Methods {
parts = append(parts, fmt.Sprintf("- `%s`", m.Signature))
}
}
return strings.Join(parts, "\n")
}
func (s *RustDocsScraper) funcToDocument(f *rustdocs.Func, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# fn %s::%s\n\n", crate.Name, f.Name)
if f.Signature != "" {
fmt.Fprintf(&content, "```rust\n%s\n```\n", f.Signature)
}
if f.Doc != "" {
fmt.Fprintf(&content, "%s\n", f.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"function": f.Name,
"path": f.Path,
"is_experimental": f.IsExperimental,
"is_unsafe": f.IsUnsafe,
"is_const": f.IsConst,
"is_async": f.IsAsync,
"kind": "fn",
"signature": f.Signature,
}
return &Document{
ID: generateDocID(f.DocURL),
Source: sourceName,
Type: "rust-fn",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, f.Name),
Content: content.String(),
URL: f.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) macroToDocument(m *rustdocs.Macro, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# macro %s::%s!\n\n", crate.Name, m.Name)
if m.Signature != "" {
fmt.Fprintf(&content, "```rust\n%s\n```\n", m.Signature)
}
if m.Doc != "" {
fmt.Fprintf(&content, "%s\n", m.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"macro": m.Name,
"path": m.Path,
"is_experimental": m.IsExperimental,
"kind": "macro",
}
return &Document{
ID: generateDocID(m.DocURL),
Source: sourceName,
Type: "rust-macro",
Title: fmt.Sprintf("%s::%s! - Rust", crate.Name, m.Name),
Content: content.String(),
URL: m.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) constToDocument(c *rustdocs.Const, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# const %s::%s\n\n", crate.Name, c.Name)
if c.Type != "" {
fmt.Fprintf(&content, "Type: `%s`\n", c.Type)
}
if c.Value != "" {
fmt.Fprintf(&content, "Value: `%s`\n", c.Value)
}
if c.Doc != "" {
fmt.Fprintf(&content, "\n%s\n", c.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"const": c.Name,
"path": c.Path,
"is_experimental": c.IsExperimental,
"type": c.Type,
"value": c.Value,
"kind": "const",
}
return &Document{
ID: generateDocID(c.DocURL),
Source: sourceName,
Type: "rust-const",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, c.Name),
Content: content.String(),
URL: c.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *RustDocsScraper) staticToDocument(st *rustdocs.Static, crate *rustdocs.Crate, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# static %s::%s\n\n", crate.Name, st.Name)
if st.Type != "" {
fmt.Fprintf(&content, "Type: `%s`\n", st.Type)
}
if st.IsMutable {
fmt.Fprintf(&content, "Mutability: mutable\n")
}
if st.Doc != "" {
fmt.Fprintf(&content, "\n%s\n", st.Doc)
}
metadata := map[string]interface{}{
"crate": crate.Name,
"static": st.Name,
"path": st.Path,
"is_experimental": st.IsExperimental,
"is_mutable": st.IsMutable,
"type": st.Type,
"kind": "static",
}
return &Document{
ID: generateDocID(st.DocURL),
Source: sourceName,
Type: "rust-static",
Title: fmt.Sprintf("%s::%s - Rust", crate.Name, st.Name),
Content: content.String(),
URL: st.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+129
View File
@@ -0,0 +1,129 @@
// Package scraper provides document scraping capabilities for various sources.
package scraper
import (
"context"
"time"
)
// SourceType represents the type of documentation source.
type SourceType string
const (
SourceTypeWeb SourceType = "url"
SourceTypeGitHub SourceType = "github"
SourceTypeOpenAPI SourceType = "openapi"
SourceTypeLocal SourceType = "local"
SourceTypeGoDocs SourceType = "godocs"
SourceTypeRustDocs SourceType = "rustdocs"
SourceTypePythonDocs SourceType = "pythondocs"
SourceTypeJavaDocs SourceType = "javadocs"
SourceTypeSpringDocs SourceType = "springdocs"
SourceTypeSpringAIDocs SourceType = "springaidocs"
SourceTypeTSDocs SourceType = "tsdocs"
SourceTypeReactDocs SourceType = "reactdocs"
SourceTypeVueDocs SourceType = "vuedocs"
SourceTypeNuxtDocs SourceType = "nuxtdocs"
SourceTypeMCPDocs SourceType = "mcpdocs"
SourceTypeDockerDocs SourceType = "dockerdocs"
SourceTypeCloudflareDocs SourceType = "cloudflaredocs"
SourceTypeAstroDocs SourceType = "astrodocs"
)
// Source represents a documentation source to scrape.
type Source struct {
Name string `yaml:"name"`
Type SourceType `yaml:"type"`
URL string `yaml:"url,omitempty"`
Repo string `yaml:"repo,omitempty"`
Branch string `yaml:"branch,omitempty"`
Path string `yaml:"path,omitempty"`
Include []string `yaml:"include,omitempty"`
Exclude []string `yaml:"exclude,omitempty"`
Schedule string `yaml:"schedule,omitempty"`
}
// Document represents a scraped document.
type Document struct {
ID string `json:"id"`
Source string `json:"source"`
Type string `json:"type"`
Title string `json:"title"`
Content string `json:"content"`
URL string `json:"url,omitempty"`
Metadata map[string]interface{} `json:"metadata"`
Hash string `json:"hash"`
Timestamp time.Time `json:"timestamp"`
}
// Config holds scraper configuration.
type Config struct {
UserAgent string `yaml:"user_agent"`
Timeout time.Duration `yaml:"timeout"`
RetryCount int `yaml:"retry_count"`
RetryDelay time.Duration `yaml:"retry_delay"`
Concurrency int `yaml:"concurrency"`
RateLimit time.Duration `yaml:"rate_limit"`
MaxDepth int `yaml:"max_depth"`
CacheDir string `yaml:"cache_dir"`
}
// Scraper defines the interface for document scrapers.
type Scraper interface {
// Scrape fetches and parses documents from the source.
Scrape(ctx context.Context, source *Source) ([]*Document, error)
// DetectChanges checks if the source has changed since last scrape.
DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error)
}
// NewScraper creates a new scraper for the given source type.
func NewScraper(sourceType SourceType, config *Config) Scraper {
switch sourceType {
case SourceTypeWeb:
return NewWebScraper(config)
case SourceTypeGitHub:
return NewGitHubScraper(config)
case SourceTypeOpenAPI:
return NewOpenAPIScraper(config)
case SourceTypeLocal:
return NewLocalScraper(config)
case SourceTypeGoDocs:
return NewGoDocsScraper(config)
case SourceTypeRustDocs:
return NewRustDocsScraper(config)
case SourceTypePythonDocs:
return NewPythonDocsScraper(config)
case SourceTypeJavaDocs:
return NewJavaDocsScraper(config)
case SourceTypeSpringDocs:
return NewSpringDocsScraper(config)
case SourceTypeTSDocs:
return NewTSDocsScraper(config)
case SourceTypeReactDocs:
return NewReactDocsScraper(config)
case SourceTypeVueDocs:
return NewVueDocsScraper(config)
case SourceTypeNuxtDocs:
return NewNuxtDocsScraper(config)
case SourceTypeMCPDocs:
return NewMCPDocsScraper(config)
case SourceTypeDockerDocs:
return NewDockerDocsScraper(config)
case SourceTypeCloudflareDocs:
return NewCloudflareDocsScraper(config)
case SourceTypeAstroDocs:
return NewAstroDocsScraper(config)
default:
return nil
}
}
// DetectSourceType determines the source type from a URL or path.
func DetectSourceType(input string) SourceType {
// TODO: Implement detection logic
if len(input) > 4 && input[:4] == "http" {
return SourceTypeWeb
}
return SourceTypeLocal
}
+221
View File
@@ -0,0 +1,221 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/springdocs"
)
type SpringDocsScraper struct {
config *Config
parser *springdocs.Parser
client *http.Client
}
func NewSpringDocsScraper(config *Config) *SpringDocsScraper {
return &SpringDocsScraper{
config: config,
parser: springdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *SpringDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Spring docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
module, err := s.parser.ParseModulePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse module: %w", err)
}
mainDoc := s.moduleToDocument(module, source.Name)
documents = append(documents, mainDoc)
for _, class := range module.Classes {
doc := s.classToDocument(class, module, source.Name)
documents = append(documents, doc)
}
for _, prop := range module.Properties {
doc := s.propertyToDocument(prop, source.Name)
documents = append(documents, doc)
}
for _, guide := range module.Guides {
doc := s.guideToDocument(guide, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *SpringDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *SpringDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *SpringDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *SpringDocsScraper) moduleToDocument(module *springdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", module.Name)
fmt.Fprintf(&content, "%s\n", module.Doc)
metadata := map[string]interface{}{
"module": module.Name,
"version": module.Version,
"doc_url": module.DocURL,
"doc_type": "spring-module",
}
return &Document{
ID: generateDocID(module.DocURL),
Source: sourceName,
Type: "spring-module",
Title: module.Name,
Content: content.String(),
URL: module.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *SpringDocsScraper) classToDocument(class *springdocs.Class, module *springdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", class.QualifiedName)
fmt.Fprintf(&content, "%s\n", class.Doc)
if len(class.Methods) > 0 {
fmt.Fprintf(&content, "\n## Methods\n")
for _, m := range class.Methods {
fmt.Fprintf(&content, "- `%s`\n", m.Signature)
}
}
metadata := map[string]interface{}{
"module": module.Name,
"qualified_name": class.QualifiedName,
"kind": class.Kind,
"doc_url": class.DocURL,
}
return &Document{
ID: generateDocID(class.DocURL),
Source: sourceName,
Type: "spring-class",
Title: class.QualifiedName,
Content: content.String(),
URL: class.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *SpringDocsScraper) propertyToDocument(prop *springdocs.Property, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", prop.Name)
fmt.Fprintf(&content, "Type: %s\n", prop.Type)
if prop.Default != "" {
fmt.Fprintf(&content, "Default: `%s`\n", prop.Default)
}
fmt.Fprintf(&content, "\n%s\n", prop.Doc)
metadata := map[string]interface{}{
"property": prop.Name,
"type": prop.Type,
"default": prop.Default,
"doc_url": prop.DocURL,
}
return &Document{
ID: generateDocID(prop.Name),
Source: sourceName,
Type: "spring-property",
Title: prop.Name,
Content: content.String(),
URL: prop.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *SpringDocsScraper) guideToDocument(guide *springdocs.Guide, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", guide.Title)
fmt.Fprintf(&content, "%s\n", guide.Description)
metadata := map[string]interface{}{
"title": guide.Title,
"doc_url": guide.DocURL,
"level": guide.Level,
"doc_type": "spring-guide",
}
return &Document{
ID: generateDocID(guide.DocURL),
Source: sourceName,
Type: "spring-guide",
Title: guide.Title,
Content: content.String(),
URL: guide.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+261
View File
@@ -0,0 +1,261 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/tsdocs"
)
type TSDocsScraper struct {
config *Config
parser *tsdocs.Parser
client *http.Client
}
func NewTSDocsScraper(config *Config) *TSDocsScraper {
return &TSDocsScraper{
config: config,
parser: tsdocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *TSDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for TypeScript docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
module, err := s.parser.ParseModulePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse module: %w", err)
}
mainDoc := s.moduleToDocument(module, source.Name)
documents = append(documents, mainDoc)
for _, iface := range module.Interfaces {
doc := s.interfaceToDocument(iface, module, source.Name)
documents = append(documents, doc)
}
for _, fn := range module.Functions {
doc := s.functionToDocument(fn, module, source.Name)
documents = append(documents, doc)
}
for _, class := range module.Classes {
doc := s.classToDocument(class, module, source.Name)
documents = append(documents, doc)
}
for _, ta := range module.Types {
doc := s.typeAliasToDocument(ta, module, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *TSDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *TSDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *TSDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *TSDocsScraper) moduleToDocument(module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", module.Name)
fmt.Fprintf(&content, "%s\n", module.Doc)
metadata := map[string]interface{}{
"module": module.Name,
"version": module.Version,
"doc_url": module.DocURL,
"doc_type": "ts-module",
}
return &Document{
ID: generateDocID(module.DocURL),
Source: sourceName,
Type: "ts-module",
Title: module.Name,
Content: content.String(),
URL: module.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *TSDocsScraper) interfaceToDocument(iface *tsdocs.Interface, module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (interface)\n\n", iface.Name)
if iface.Doc != "" {
fmt.Fprintf(&content, "%s\n\n", iface.Doc)
}
if len(iface.Properties) > 0 {
fmt.Fprintf(&content, "## Properties\n")
for _, p := range iface.Properties {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"module": module.Name,
"name": iface.Name,
"doc_url": iface.DocURL,
}
return &Document{
ID: generateDocID(iface.DocURL),
Source: sourceName,
Type: "ts-interface",
Title: iface.Name,
Content: content.String(),
URL: iface.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *TSDocsScraper) functionToDocument(fn *tsdocs.Function, module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s()\n\n", fn.Name)
if fn.Signature != "" {
fmt.Fprintf(&content, "```typescript\n%s\n```\n\n", fn.Signature)
}
if fn.Doc != "" {
fmt.Fprintf(&content, "%s\n", fn.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"name": fn.Name,
"return_type": fn.ReturnType,
"doc_url": fn.DocURL,
}
return &Document{
ID: generateDocID(fn.DocURL),
Source: sourceName,
Type: "ts-function",
Title: fn.Name,
Content: content.String(),
URL: fn.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *TSDocsScraper) classToDocument(class *tsdocs.Class, module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (class)\n\n", class.Name)
if class.Doc != "" {
fmt.Fprintf(&content, "%s\n\n", class.Doc)
}
if len(class.Methods) > 0 {
fmt.Fprintf(&content, "## Methods\n")
for _, m := range class.Methods {
fmt.Fprintf(&content, "- `%s()`\n", m.Name)
}
}
metadata := map[string]interface{}{
"module": module.Name,
"name": class.Name,
"doc_url": class.DocURL,
}
return &Document{
ID: generateDocID(class.DocURL),
Source: sourceName,
Type: "ts-class",
Title: class.Name,
Content: content.String(),
URL: class.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *TSDocsScraper) typeAliasToDocument(ta *tsdocs.TypeAlias, module *tsdocs.Module, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s (type)\n\n", ta.Name)
fmt.Fprintf(&content, "```typescript\ntype %s = %s\n```\n\n", ta.Name, ta.Type)
if ta.Doc != "" {
fmt.Fprintf(&content, "%s\n", ta.Doc)
}
metadata := map[string]interface{}{
"module": module.Name,
"name": ta.Name,
"doc_url": ta.DocURL,
}
return &Document{
ID: generateDocID(ta.DocURL),
Source: sourceName,
Type: "ts-type",
Title: ta.Name,
Content: content.String(),
URL: ta.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+244
View File
@@ -0,0 +1,244 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/yourorg/devour/pkg/vuedocs"
)
type VueDocsScraper struct {
config *Config
parser *vuedocs.Parser
client *http.Client
}
func NewVueDocsScraper(config *Config) *VueDocsScraper {
return &VueDocsScraper{
config: config,
parser: vuedocs.NewParser(),
client: &http.Client{
Timeout: config.Timeout,
},
}
}
func (s *VueDocsScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
if source.URL == "" {
return nil, fmt.Errorf("URL is required for Vue docs scraper")
}
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to fetch page: %w", err)
}
ref, err := s.parser.ParseReferencePage(html, source.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse reference: %w", err)
}
mainDoc := s.referenceToDocument(ref, source.Name)
documents = append(documents, mainDoc)
for _, comp := range ref.Composition {
doc := s.compositionToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, dir := range ref.Directives {
doc := s.directiveToDocument(dir, source.Name)
documents = append(documents, doc)
}
for _, comp := range ref.Components {
doc := s.componentToDocument(comp, source.Name)
documents = append(documents, doc)
}
for _, api := range ref.GlobalAPI {
doc := s.globalAPIToDocument(api, source.Name)
documents = append(documents, doc)
}
return documents, nil
}
func (s *VueDocsScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
html, err := s.fetchPage(ctx, source.URL)
if err != nil {
return false, "", err
}
hash := s.generateHash(html)
changed := hash != lastHash
return changed, hash, nil
}
func (s *VueDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func (s *VueDocsScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
func (s *VueDocsScraper) referenceToDocument(ref *vuedocs.Reference, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# Vue API Reference\n\n")
fmt.Fprintf(&content, "Composition API: %d, Directives: %d, Components: %d\n", len(ref.Composition), len(ref.Directives), len(ref.Components))
return &Document{
ID: generateDocID(ref.DocURL),
Source: sourceName,
Type: "vue-reference",
Title: "Vue API Reference",
Content: content.String(),
URL: ref.DocURL,
Metadata: map[string]interface{}{"doc_type": "vue-reference"},
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) compositionToDocument(comp *vuedocs.Composition, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", comp.Name)
if comp.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", comp.Signature)
}
fmt.Fprintf(&content, "%s\n", comp.Doc)
metadata := map[string]interface{}{
"name": comp.Name,
"kind": comp.Kind,
"doc_url": comp.DocURL,
"doc_type": "vue-composition",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "vue-composition",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) directiveToDocument(dir *vuedocs.Directive, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", dir.Name)
fmt.Fprintf(&content, "%s\n", dir.Doc)
metadata := map[string]interface{}{
"name": dir.Name,
"doc_url": dir.DocURL,
"doc_type": "vue-directive",
}
return &Document{
ID: generateDocID(dir.DocURL),
Source: sourceName,
Type: "vue-directive",
Title: dir.Name,
Content: content.String(),
URL: dir.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) componentToDocument(comp *vuedocs.Component, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# <%s />\n\n", comp.Name)
fmt.Fprintf(&content, "%s\n", comp.Doc)
if len(comp.Props) > 0 {
fmt.Fprintf(&content, "\n## Props\n")
for _, p := range comp.Props {
fmt.Fprintf(&content, "- `%s: %s`\n", p.Name, p.Type)
}
}
metadata := map[string]interface{}{
"name": comp.Name,
"doc_url": comp.DocURL,
"doc_type": "vue-component",
}
return &Document{
ID: generateDocID(comp.DocURL),
Source: sourceName,
Type: "vue-component",
Title: comp.Name,
Content: content.String(),
URL: comp.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
func (s *VueDocsScraper) globalAPIToDocument(api *vuedocs.API, sourceName string) *Document {
var content strings.Builder
fmt.Fprintf(&content, "# %s\n\n", api.Name)
if api.Signature != "" {
fmt.Fprintf(&content, "```javascript\n%s\n```\n\n", api.Signature)
}
fmt.Fprintf(&content, "%s\n", api.Doc)
metadata := map[string]interface{}{
"name": api.Name,
"category": api.Category,
"doc_url": api.DocURL,
"doc_type": "vue-api",
}
return &Document{
ID: generateDocID(api.DocURL),
Source: sourceName,
Type: "vue-api",
Title: api.Name,
Content: content.String(),
URL: api.DocURL,
Metadata: metadata,
Hash: s.generateHash(content.String()),
Timestamp: time.Now(),
}
}
+296
View File
@@ -0,0 +1,296 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"net/url"
"regexp"
"strings"
"time"
"github.com/gocolly/colly/v2"
)
// WebScraper scrapes documentation from web URLs.
type WebScraper struct {
config *Config
}
// NewWebScraper creates a new web scraper.
func NewWebScraper(config *Config) *WebScraper {
return &WebScraper{config: config}
}
// Scrape fetches and parses documents from a web source.
func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
var documents []*Document
visited := make(map[string]bool)
// Parse base URL for domain restrictions
baseURL, err := url.Parse(source.URL)
if err != nil {
return nil, fmt.Errorf("invalid URL: %w", err)
}
// Create Colly collector
c := colly.NewCollector(
colly.AllowedDomains(baseURL.Host),
colly.MaxDepth(s.config.MaxDepth),
colly.Async(true),
colly.UserAgent(s.config.UserAgent),
)
// Set rate limiting
if s.config.RateLimit > 0 {
c.Limit(&colly.LimitRule{
DomainGlob: "*",
Parallelism: s.config.Concurrency,
Delay: s.config.RateLimit,
})
}
// Set timeout
if s.config.Timeout > 0 {
c.SetRequestTimeout(s.config.Timeout)
}
// Enable caching if cache directory is set
if s.config.CacheDir != "" {
c.CacheDir = s.config.CacheDir
}
// Handle errors
c.OnError(func(r *colly.Response, err error) {
fmt.Printf("Error scraping %s: %v\n", r.Request.URL, err)
})
// Extract content from pages
c.OnHTML("html", func(e *colly.HTMLElement) {
pageURL := e.Request.URL.String()
// Skip if already visited
if visited[pageURL] {
return
}
visited[pageURL] = true
// Check include/exclude patterns
if !s.shouldInclude(pageURL, source.Include, source.Exclude) {
return
}
// Extract title
title := e.ChildText("title")
if title == "" {
title = e.ChildText("h1")
}
// Extract main content
content := s.extractContent(e)
// Skip if content is too short
if len(content) < 100 {
return
}
// Generate hash for change detection
hash := s.generateHash(content)
// Extract metadata
metadata := map[string]interface{}{
"headings": s.extractHeadings(e),
"links": s.extractLinks(e),
"images": s.extractImages(e),
"description": e.ChildAttr(`meta[name="description"]`, "content"),
}
doc := &Document{
ID: generateDocID(pageURL),
Source: source.Name,
Type: "html",
Title: strings.TrimSpace(title),
Content: content,
URL: pageURL,
Metadata: metadata,
Hash: hash,
Timestamp: time.Now(),
}
documents = append(documents, doc)
})
// Follow links
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
link := e.Attr("href")
absoluteURL := e.Request.AbsoluteURL(link)
// Skip if already visited
if visited[absoluteURL] {
return
}
// Check include/exclude patterns
if !s.shouldInclude(absoluteURL, source.Include, source.Exclude) {
return
}
c.Visit(absoluteURL)
})
// Start scraping
if err := c.Visit(source.URL); err != nil {
return nil, fmt.Errorf("failed to start scraping: %w", err)
}
// Wait for async scraping to complete
c.Wait()
return documents, nil
}
// DetectChanges checks if the web source has changed.
func (s *WebScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
// Quick check by fetching just the main page
c := colly.NewCollector(
colly.UserAgent(s.config.UserAgent),
)
c.SetRequestTimeout(s.config.Timeout)
var content string
c.OnHTML("html", func(e *colly.HTMLElement) {
content = s.extractContent(e)
})
if err := c.Visit(source.URL); err != nil {
return false, "", err
}
currentHash := s.generateHash(content)
changed := currentHash != lastHash
return changed, currentHash, nil
}
// extractContent extracts the main text content from a page.
func (s *WebScraper) extractContent(e *colly.HTMLElement) string {
// Try common content selectors
selectors := []string{
"article",
"main",
".content",
".documentation",
".docs",
".markdown-body",
"[role='main']",
"#content",
"#main",
}
var content string
for _, selector := range selectors {
content = e.ChildText(selector)
if len(content) > 200 {
break
}
}
// Fallback to body if no content found
if content == "" {
content = e.ChildText("body")
}
// Clean up content
content = cleanText(content)
return content
}
// extractHeadings extracts heading structure.
func (s *WebScraper) extractHeadings(e *colly.HTMLElement) []string {
var headings []string
e.ForEach("h1, h2, h3, h4, h5, h6", func(_ int, h *colly.HTMLElement) {
text := strings.TrimSpace(h.Text)
if text != "" {
headings = append(headings, text)
}
})
return headings
}
// extractLinks extracts internal links.
func (s *WebScraper) extractLinks(e *colly.HTMLElement) []string {
var links []string
seen := make(map[string]bool)
e.ForEach("a[href]", func(_ int, a *colly.HTMLElement) {
href := a.Attr("href")
if href != "" && !seen[href] && !strings.HasPrefix(href, "#") {
links = append(links, href)
seen[href] = true
}
})
return links
}
// extractImages extracts image URLs.
func (s *WebScraper) extractImages(e *colly.HTMLElement) []string {
var images []string
e.ForEach("img[src]", func(_ int, img *colly.HTMLElement) {
src := img.Attr("src")
if src != "" {
images = append(images, src)
}
})
return images
}
// shouldInclude checks if a URL should be included based on patterns.
func (s *WebScraper) shouldInclude(urlStr string, include, exclude []string) bool {
// Check exclude patterns first
for _, pattern := range exclude {
matched, _ := regexp.MatchString(pattern, urlStr)
if matched {
return false
}
}
// If no include patterns, include all
if len(include) == 0 {
return true
}
// Check include patterns
for _, pattern := range include {
matched, _ := regexp.MatchString(pattern, urlStr)
if matched {
return true
}
}
return false
}
// generateHash generates a SHA256 hash of content.
func (s *WebScraper) generateHash(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:])
}
// cleanText removes extra whitespace and normalizes text.
func cleanText(text string) string {
// Replace multiple whitespace with single space
re := regexp.MustCompile(`\s+`)
text = re.ReplaceAllString(text, " ")
// Trim spaces
text = strings.TrimSpace(text)
return text
}
// generateDocID generates a unique ID for a document.
func generateDocID(urlStr string) string {
hash := sha256.Sum256([]byte(urlStr))
return hex.EncodeToString(hash[:12])
}
+94
View File
@@ -0,0 +1,94 @@
// Package server provides MCP server implementation.
package server
import (
"context"
)
// Config holds server configuration.
type Config struct {
Mode string `yaml:"mode"`
Transport string `yaml:"transport"`
Host string `yaml:"host"`
Port int `yaml:"port"`
}
// Server defines the MCP server interface.
type Server interface {
// Start begins listening for connections.
Start(ctx context.Context) error
// Stop gracefully shuts down the server.
Stop(ctx context.Context) error
}
// QueryRequest represents a search query.
type QueryRequest struct {
Query string `json:"query"`
Limit int `json:"limit,omitempty"`
Threshold float64 `json:"threshold,omitempty"`
}
// QueryResponse represents search results.
type QueryResponse struct {
Query string `json:"query"`
Results []Result `json:"results"`
Total int `json:"total"`
TookMs int64 `json:"took_ms"`
}
// Result represents a single search result.
type Result struct {
ID string `json:"id"`
DocumentID string `json:"document_id"`
Content string `json:"content"`
Score float64 `json:"score"`
Source string `json:"source"`
Metadata map[string]any `json:"metadata,omitempty"`
}
// NewServer creates a new MCP server.
func NewServer(config *Config) Server {
if config.Mode == "remote" {
return NewHTTPServer(config)
}
return NewStdioServer(config)
}
// NewHTTPServer creates an HTTP-based MCP server.
func NewHTTPServer(config *Config) *HTTPServer {
return &HTTPServer{config: config}
}
// NewStdioServer creates a stdio-based MCP server.
func NewStdioServer(config *Config) *StdioServer {
return &StdioServer{config: config}
}
// HTTPServer implements Server for HTTP transport.
type HTTPServer struct {
config *Config
}
func (s *HTTPServer) Start(ctx context.Context) error {
// TODO: Implement HTTP server with MCP endpoints
return nil
}
func (s *HTTPServer) Stop(ctx context.Context) error {
return nil
}
// StdioServer implements Server for stdio transport.
type StdioServer struct {
config *Config
}
func (s *StdioServer) Start(ctx context.Context) error {
// TODO: Implement stdio JSON-RPC server
return nil
}
func (s *StdioServer) Stop(ctx context.Context) error {
return nil
}
+206
View File
@@ -0,0 +1,206 @@
package ui
import (
"fmt"
"os"
"strings"
)
// Version information (set via ldflags at build time)
var (
Version = "dev"
BuildDate = "unknown"
GitCommit = "unknown"
)
// ANSI color codes for terminal output
const (
ColorReset = "\033[0m"
ColorCyan = "\033[36m"
ColorTeal = "\033[38;5;37m"
ColorBold = "\033[1m"
ColorDim = "\033[2m"
ColorItalic = "\033[3m"
)
// IsColorSupported checks if the terminal supports colors
func IsColorSupported() bool {
// Check if stdout is a terminal
if fileInfo, _ := os.Stdout.Stat(); (fileInfo.Mode() & os.ModeCharDevice) != 0 {
// Check for NO_COLOR environment variable
if os.Getenv("NO_COLOR") != "" {
return false
}
// Check for TERM variable
term := os.Getenv("TERM")
return term != "" && term != "dumb"
}
return false
}
// Colorize returns a colorized string if colors are supported
func Colorize(text, colorCode string) string {
if IsColorSupported() {
return colorCode + text + ColorReset
}
return text
}
// PrintHeader prints a styled header with the Devour branding
func PrintHeader(title string) {
useColor := IsColorSupported()
border := strings.Repeat("=", len(title)+4)
if useColor {
fmt.Printf("%s%s%s\n", ColorCyan, border, ColorReset)
fmt.Printf("%s %s %s\n", ColorCyan+ColorBold, title, ColorReset)
fmt.Printf("%s%s%s\n", ColorCyan, border, ColorReset)
} else {
fmt.Println(border)
fmt.Printf(" %s \n", title)
fmt.Println(border)
}
}
// PrintSection prints a section header
func PrintSection(title string) {
useColor := IsColorSupported()
line := strings.Repeat("-", 40)
if useColor {
fmt.Printf("\n%s%s%s\n", ColorTeal, line, ColorReset)
fmt.Printf("%s%s%s\n", ColorTeal+ColorBold, title, ColorReset)
fmt.Printf("%s%s%s\n", ColorTeal, line, ColorReset)
} else {
fmt.Printf("\n%s\n", line)
fmt.Println(title)
fmt.Printf("%s\n", line)
}
}
// PrintKeyValue prints a key-value pair with styling
func PrintKeyValue(key, value string) {
useColor := IsColorSupported()
if useColor {
fmt.Printf(" %s%-20s%s %s\n", ColorCyan, key+":", ColorReset, value)
} else {
fmt.Printf(" %-20s %s\n", key+":", value)
}
}
// PrintSuccess prints a success message
func PrintSuccess(message string) {
useColor := IsColorSupported()
if useColor {
fmt.Printf("%s%s%s %s\n", ColorCyan, "SUCCESS:", ColorReset, message)
} else {
fmt.Printf("SUCCESS: %s\n", message)
}
}
// PrintError prints an error message
func PrintError(message string) {
fmt.Fprintf(os.Stderr, "ERROR: %s\n", message)
}
// PrintWarning prints a warning message
func PrintWarning(message string) {
fmt.Printf("WARNING: %s\n", message)
}
// PrintInfo prints an info message
func PrintInfo(message string) {
useColor := IsColorSupported()
if useColor {
fmt.Printf("%s%s%s\n", ColorDim, message, ColorReset)
} else {
fmt.Println(message)
}
}
// PrintVersionInfo prints detailed version information
func PrintVersionInfo() {
PrintHeader("Devour Version Info")
PrintKeyValue("Version", Version)
PrintKeyValue("Build Date", BuildDate)
PrintKeyValue("Git Commit", GitCommit)
fmt.Println()
}
// PrintHelpBanner prints the help banner with character
func PrintHelpBanner() {
PrintBanner(Version)
}
// PrintStartupBanner prints the startup banner for interactive sessions
func PrintStartupBanner() {
PrintWelcome(Version)
}
// BoxStyle defines a box drawing style
type BoxStyle struct {
TopLeft string
TopRight string
BottomLeft string
BottomRight string
Horizontal string
Vertical string
}
// DefaultBoxStyle is the default box style
var DefaultBoxStyle = BoxStyle{
TopLeft: " ",
TopRight: " ",
BottomLeft: " ",
BottomRight: " ",
Horizontal: " ",
Vertical: " ",
}
// DoubleBoxStyle uses double-line box characters
var DoubleBoxStyle = BoxStyle{
TopLeft: " ",
TopRight: " ",
BottomLeft: " ",
BottomRight: " ",
Horizontal: " ",
Vertical: " ",
}
// PrintInBox prints text inside a box
func PrintInBox(text string, style BoxStyle) {
lines := strings.Split(text, "\n")
maxWidth := 0
for _, line := range lines {
if len(line) > maxWidth {
maxWidth = len(line)
}
}
// Top border
fmt.Printf("%s%s%s\n", style.TopLeft, strings.Repeat(style.Horizontal, maxWidth+2), style.TopRight)
// Content
for _, line := range lines {
padding := maxWidth - len(line)
fmt.Printf("%s %s%s %s\n", style.Vertical, line, strings.Repeat(" ", padding), style.Vertical)
}
// Bottom border
fmt.Printf("%s%s%s\n", style.BottomLeft, strings.Repeat(style.Horizontal, maxWidth+2), style.BottomRight)
}
// PrintDivider prints a horizontal divider
func PrintDivider() {
useColor := IsColorSupported()
line := strings.Repeat(" ", 50)
if useColor {
fmt.Printf("%s%s%s\n", ColorDim, line, ColorReset)
} else {
fmt.Println(line)
}
}
+179
View File
@@ -0,0 +1,179 @@
package ui
import (
"fmt"
"strings"
)
// DevourCharacter is the ASCII art representation of the Devour mascot
// A documentation-devouring creature with cyan/teal coloring
const DevourCharacter = `
___
.-' '-.
/ \
| O O |
| __ |
\ ' ' /
'-.___.-'
___ / \ ___
_.-' '/ \' '-._
_.-' | | '-._
/ \ ___ / \
| __ \ / \ / __ |
| .' '. .---. | | .---. .' '. |
| | | | | | | | | | | |
\ \__/ / | | | | | \__/ /
'.____.' | | | | '.____.'
| | | |
| | | |
/ | | \
/ | | \
/ |_____| \
/ / \ \
/ / \ \
/ / \ \
/_____/ \_____\
Devour - Documentation Devourer
`
// DevourCharacterSmall is a smaller version for compact displays
const DevourCharacterSmall = `
___
.-' '-.
/ O O \
| _ |
\ '-' /
'-.___.-'
Documentation Devourer
`
// DevourLogo is a stylized text logo
const DevourLogo = `
____ _____ __________ _____ ____ _____
| _ \| ____| ___| ___|_ _| _ \| ____|
| | | | _| | |_ | |_ | | | | | | _|
| |_| | |___| _| | _| | | | |_| | |___
|____/|_____|_| |_| |_| |____/|_____|
Documentation Devourer
`
// DevourBanner combines the character with version info
const DevourBanner = `
___
.-' '-.
/ O O \
| _ |
\ '-' /
'-.___.-'
____ _____ __________ _____
| _ \| ____| ___| ___|_ _|
| | | | _| | |_ | |_ | |
| |_| | |___| _| | _| | |
|____/|_____|_| |_| |_|
Documentation Devourer v%s
`
// PrintCharacter prints the full ASCII character
func PrintCharacter() {
fmt.Println(DevourCharacter)
}
// PrintCharacterSmall prints the smaller character version
func PrintCharacterSmall() {
fmt.Println(DevourCharacterSmall)
}
// PrintLogo prints just the text logo
func PrintLogo() {
fmt.Println(DevourLogo)
}
// PrintBanner prints the character with version info
func PrintBanner(version string) {
fmt.Printf(DevourBanner, version)
}
// GetCharacter returns the character string
func GetCharacter() string {
return DevourCharacter
}
// GetCharacterSmall returns the small character string
func GetCharacterSmall() string {
return DevourCharacterSmall
}
// GetLogo returns the logo string
func GetLogo() string {
return DevourLogo
}
// GetBanner returns the formatted banner string
func GetBanner(version string) string {
return fmt.Sprintf(DevourBanner, version)
}
// PrintWelcome prints a welcome message with the character
func PrintWelcome(version string) {
PrintBanner(version)
fmt.Println()
fmt.Println(" Context ingestion and management for AI")
fmt.Println()
}
// PrintWithColor prints the character with ANSI color codes (cyan/teal theme)
func PrintWithColor(useColor bool) {
if useColor {
// Cyan color for the character
cyan := "\033[36m"
reset := "\033[0m"
fmt.Printf("%s%s%s", cyan, DevourCharacter, reset)
} else {
PrintCharacter()
}
}
// GetColoredCharacter returns the character with ANSI color codes
func GetColoredCharacter(useColor bool) string {
if useColor {
cyan := "\033[36m"
reset := "\033[0m"
return cyan + DevourCharacter + reset
}
return DevourCharacter
}
// PrintStatusWithCharacter prints status output with the small character
func PrintStatusWithCharacter(statusLines []string) {
// Print small character on the left
lines := strings.Split(DevourCharacterSmall, "\n")
maxWidth := 0
for _, line := range lines {
if len(line) > maxWidth {
maxWidth = len(line)
}
}
// Pad character lines and add status
charIdx := 0
statusIdx := 0
for charIdx < len(lines) || statusIdx < len(statusLines) {
if charIdx < len(lines) {
line := lines[charIdx]
// Pad to align status
padding := maxWidth - len(line)
if statusIdx < len(statusLines) {
fmt.Printf("%s%s %s\n", line, strings.Repeat(" ", padding+2), statusLines[statusIdx])
statusIdx++
} else {
fmt.Println(line)
}
charIdx++
} else if statusIdx < len(statusLines) {
fmt.Printf("%s %s\n", strings.Repeat(" ", maxWidth+2), statusLines[statusIdx])
statusIdx++
}
}
}
+290
View File
@@ -0,0 +1,290 @@
// Package vector provides vector storage capabilities.
package vector
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"sync"
)
// Config holds vector store configuration.
type Config struct {
Type string `yaml:"type"`
PersistDir string `yaml:"persist_dir"`
SimilarityMetric string `yaml:"similarity_metric"`
}
// Document represents an indexed document.
type Document struct {
ID string
Content string
Vector []float32
Metadata map[string]any
}
// SearchResult represents a search result.
type SearchResult struct {
ID string
Content string
Score float64
Metadata map[string]any
}
type scoredDoc struct {
id string
score float64
}
// Store defines the interface for vector storage.
type Store interface {
// Add adds documents to the store.
Add(ctx context.Context, docs []*Document) error
// Search finds similar documents.
Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error)
// Delete removes a document by ID.
Delete(ctx context.Context, id string) error
// Get retrieves a document by ID.
Get(ctx context.Context, id string) (*Document, error)
// Count returns the number of documents.
Count(ctx context.Context) (int, error)
// Clear removes all documents.
Clear(ctx context.Context) error
}
// NewStore creates a new vector store.
func NewStore(config *Config) Store {
switch config.Type {
case "memory":
return NewMemoryStore(config)
case "chromem":
return NewChromemStore(config)
default:
return NewMemoryStore(config)
}
}
// MemoryStore implements Store with in-memory storage.
type MemoryStore struct {
config *Config
mu sync.RWMutex
docs map[string]*Document
vectors [][]float32
}
// NewMemoryStore creates an in-memory vector store.
func NewMemoryStore(config *Config) *MemoryStore {
return &MemoryStore{
config: config,
docs: make(map[string]*Document),
}
}
// Add adds documents to the store.
func (s *MemoryStore) Add(ctx context.Context, docs []*Document) error {
s.mu.Lock()
defer s.mu.Unlock()
for _, doc := range docs {
if doc.ID == "" {
doc.ID = generateID(doc.Content)
}
s.docs[doc.ID] = doc
s.vectors = append(s.vectors, doc.Vector)
}
// Persist if directory is set
if s.config.PersistDir != "" {
return s.persist()
}
return nil
}
// Search finds similar documents using cosine similarity.
func (s *MemoryStore) Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error) {
s.mu.RLock()
defer s.mu.RUnlock()
var scored []*scoredDoc
for id, doc := range s.docs {
if doc.Vector != nil {
score := cosineSimilarity(vector, doc.Vector)
scored = append(scored, &scoredDoc{id: id, score: score})
}
}
sortByScore(scored)
results := make([]*SearchResult, 0, limit)
for i := 0; i < len(scored) && i < limit; i++ {
doc := s.docs[scored[i].id]
results = append(results, &SearchResult{
ID: doc.ID,
Content: doc.Content,
Score: scored[i].score,
Metadata: doc.Metadata,
})
}
return results, nil
}
// Delete removes a document by ID.
func (s *MemoryStore) Delete(ctx context.Context, id string) error {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.docs, id)
// Rebuild vectors slice
s.vectors = nil
for _, doc := range s.docs {
s.vectors = append(s.vectors, doc.Vector)
}
return nil
}
// Get retrieves a document by ID.
func (s *MemoryStore) Get(ctx context.Context, id string) (*Document, error) {
s.mu.RLock()
defer s.mu.RUnlock()
doc, ok := s.docs[id]
if !ok {
return nil, fmt.Errorf("document not found: %s", id)
}
return doc, nil
}
// Count returns the number of documents.
func (s *MemoryStore) Count(ctx context.Context) (int, error) {
s.mu.RLock()
defer s.mu.RUnlock()
return len(s.docs), nil
}
// Clear removes all documents.
func (s *MemoryStore) Clear(ctx context.Context) error {
s.mu.Lock()
defer s.mu.Unlock()
s.docs = make(map[string]*Document)
s.vectors = nil
return nil
}
// persist saves documents to disk.
func (s *MemoryStore) persist() error {
if s.config.PersistDir == "" {
return nil
}
// Create directory if needed
if err := os.MkdirAll(s.config.PersistDir, 0755); err != nil {
return err
}
// Save documents
dataFile := filepath.Join(s.config.PersistDir, "documents.json")
// TODO: Implement actual JSON serialization
_ = dataFile // Placeholder
return nil
}
// ChromemStore wraps chromem-go for production use.
type ChromemStore struct {
config *Config
// TODO: Add chromem-go client
}
// NewChromemStore creates a chromem-backed store.
func NewChromemStore(config *Config) *ChromemStore {
return &ChromemStore{config: config}
}
func (s *ChromemStore) Add(ctx context.Context, docs []*Document) error {
// TODO: Implement with chromem-go
return fmt.Errorf("chromem store not implemented")
}
func (s *ChromemStore) Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error) {
return nil, fmt.Errorf("chromem store not implemented")
}
func (s *ChromemStore) Delete(ctx context.Context, id string) error {
return fmt.Errorf("chromem store not implemented")
}
func (s *ChromemStore) Get(ctx context.Context, id string) (*Document, error) {
return nil, fmt.Errorf("chromem store not implemented")
}
func (s *ChromemStore) Count(ctx context.Context) (int, error) {
return 0, fmt.Errorf("chromem store not implemented")
}
func (s *ChromemStore) Clear(ctx context.Context) error {
return fmt.Errorf("chromem store not implemented")
}
// Helper functions
func generateID(content string) string {
hash := sha256.Sum256([]byte(content))
return hex.EncodeToString(hash[:12])
}
func cosineSimilarity(a, b []float32) float64 {
if len(a) != len(b) {
return 0
}
var dotProduct, normA, normB float64
for i := range a {
dotProduct += float64(a[i]) * float64(b[i])
normA += float64(a[i]) * float64(a[i])
normB += float64(b[i]) * float64(b[i])
}
if normA == 0 || normB == 0 {
return 0
}
return dotProduct / (sqrt(normA) * sqrt(normB))
}
func sqrt(x float64) float64 {
// Simple Newton's method for square root
if x == 0 {
return 0
}
z := x
for i := 0; i < 10; i++ {
z = z - (z*z-x)/(2*z)
}
return z
}
func sortByScore(docs []*scoredDoc) {
for i := 1; i < len(docs); i++ {
for j := i; j > 0 && docs[j].score > docs[j-1].score; j-- {
docs[j], docs[j-1] = docs[j-1], docs[j]
}
}
}