Files
Devour/internal/config/config.go
T
Tomas Dvorak 898a3c303f update
2026-02-24 10:33:59 +01:00

369 lines
9.1 KiB
Go

package config
import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
"gopkg.in/yaml.v3"
)
// Config is the typed application configuration loaded from devour.yaml.
type Config struct {
Version int `yaml:"version"`
Storage StorageConfig `yaml:"storage"`
Embeddings EmbeddingsConfig `yaml:"embeddings"`
VectorDB VectorDBConfig `yaml:"vector_db"`
Scraper ScraperConfig `yaml:"scraper"`
Scheduler SchedulerConfig `yaml:"scheduler"`
Server ServerConfig `yaml:"server"`
Indexing IndexingConfig `yaml:"indexing"`
Verification VerificationConfig `yaml:"verification"`
Sources []SourceConfig `yaml:"sources"`
ConfigPath string `yaml:"-"`
}
type StorageConfig struct {
DocsDir string `yaml:"docs_dir"`
IndexDir string `yaml:"index_dir"`
MetadataDir string `yaml:"metadata_dir"`
CacheDir string `yaml:"cache_dir"`
}
type EmbeddingsConfig struct {
Provider string `yaml:"provider"`
Model string `yaml:"model"`
Dimensions int `yaml:"dimensions"`
APIKey string `yaml:"api_key"`
BatchSize int `yaml:"batch_size"`
BaseURL string `yaml:"base_url"`
}
type VectorDBConfig struct {
Type string `yaml:"type"`
Persist bool `yaml:"persist"`
SimilarityMetric string `yaml:"similarity_metric"`
PersistDir string `yaml:"persist_dir"`
}
type ScraperConfig struct {
UserAgent string `yaml:"user_agent"`
Timeout time.Duration `yaml:"timeout"`
RetryCount int `yaml:"retry_count"`
RetryDelay time.Duration `yaml:"retry_delay"`
Concurrency int `yaml:"concurrency"`
RateLimit time.Duration `yaml:"rate_limit"`
MaxDepth int `yaml:"max_depth"`
CacheDir string `yaml:"cache_dir"`
}
type SchedulerConfig struct {
Enabled bool `yaml:"enabled"`
Interval time.Duration `yaml:"interval"`
CheckMethod string `yaml:"check_method"`
OnStartup bool `yaml:"on_startup"`
}
type ServerConfig struct {
Mode string `yaml:"mode"`
Transport string `yaml:"transport"`
Host string `yaml:"host"`
Port int `yaml:"port"`
}
type IndexingConfig struct {
Enabled bool `yaml:"enabled"`
AutoReindex bool `yaml:"auto_reindex"`
SnippetLength int `yaml:"snippet_length"`
MaxDocs int `yaml:"max_docs"`
}
type VerificationConfig struct {
Enabled bool `yaml:"enabled"`
Timeout time.Duration `yaml:"timeout"`
}
type SourceConfig struct {
Name string `yaml:"name"`
Type string `yaml:"type"`
URL string `yaml:"url,omitempty"`
Query string `yaml:"query,omitempty"`
ResultLimit int `yaml:"result_limit,omitempty"`
Domains []string `yaml:"domains,omitempty"`
Repo string `yaml:"repo,omitempty"`
Branch string `yaml:"branch,omitempty"`
Path string `yaml:"path,omitempty"`
Include []string `yaml:"include,omitempty"`
Exclude []string `yaml:"exclude,omitempty"`
Schedule string `yaml:"schedule,omitempty"`
}
// Default returns a default configuration that matches devour init behavior.
func Default() *Config {
return &Config{
Version: 1,
Storage: StorageConfig{
DocsDir: "./devour_data/docs",
IndexDir: "./devour_data/index",
MetadataDir: "./devour_data/metadata",
CacheDir: "./devour_data/cache",
},
Embeddings: EmbeddingsConfig{
Provider: "openai",
Model: "text-embedding-3-small",
Dimensions: 1536,
BatchSize: 100,
APIKey: "${OPENAI_API_KEY}",
},
VectorDB: VectorDBConfig{
Type: "memory",
Persist: true,
SimilarityMetric: "cosine",
},
Scraper: ScraperConfig{
UserAgent: "Devour/1.0",
Timeout: 30 * time.Second,
RetryCount: 3,
RetryDelay: 1 * time.Second,
Concurrency: 10,
RateLimit: 500 * time.Millisecond,
MaxDepth: 3,
CacheDir: "./devour_data/cache",
},
Scheduler: SchedulerConfig{
Enabled: true,
Interval: 72 * time.Hour,
CheckMethod: "hash",
OnStartup: false,
},
Server: ServerConfig{
Mode: "local",
Transport: "stdio",
Host: "localhost",
Port: 8080,
},
Indexing: IndexingConfig{
Enabled: true,
AutoReindex: true,
SnippetLength: 220,
MaxDocs: 10000,
},
Verification: VerificationConfig{
Enabled: true,
Timeout: 90 * time.Second,
},
Sources: []SourceConfig{},
}
}
const initTemplateSourcesComment = `
# Sources (add your own)
sources: []
# - name: example-docs
# type: url
# url: https://docs.example.com
# include: ["**/*.md", "**/*.html"]
# - name: local-searxng
# type: localsearch
# url: http://127.0.0.1:8080/search
# query: golang http client
# result_limit: 8
# domains: ["pkg.go.dev", "go.dev"]
`
// RenderInitYAML returns the default init config file content from canonical defaults.
func RenderInitYAML(remote bool) (string, error) {
cfg := Default()
if remote {
cfg.Server.Mode = "remote"
}
// Keep the init template comments for discoverability while sourcing
// the actual values from canonical defaults.
cfg.Sources = nil
body, err := yaml.Marshal(cfg)
if err != nil {
return "", fmt.Errorf("marshal default config: %w", err)
}
trimmed := strings.TrimSuffix(string(body), "\n")
if strings.HasSuffix(trimmed, "sources: []") {
trimmed = strings.TrimSuffix(trimmed, "sources: []")
trimmed = strings.TrimSpace(trimmed)
}
return "# Devour Configuration\n" + trimmed + initTemplateSourcesComment, nil
}
// Load loads configuration from an explicit path or the default search paths.
func Load(explicitPath string) (*Config, error) {
cfg := Default()
path, err := findConfigPath(explicitPath)
if err != nil {
return nil, err
}
if path == "" {
cfg.ApplyDefaults()
return cfg, nil
}
b, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("read config: %w", err)
}
if err := yaml.Unmarshal(b, cfg); err != nil {
return nil, fmt.Errorf("parse config: %w", err)
}
cfg.ConfigPath = path
cfg.ApplyDefaults()
return cfg, nil
}
// ApplyDefaults ensures additive backward-compatible defaults after unmarshaling.
func (c *Config) ApplyDefaults() {
if c.Version == 0 {
c.Version = 1
}
if c.Storage.DocsDir == "" {
c.Storage.DocsDir = "./devour_data/docs"
}
if c.Storage.IndexDir == "" {
c.Storage.IndexDir = "./devour_data/index"
}
if c.Storage.MetadataDir == "" {
c.Storage.MetadataDir = "./devour_data/metadata"
}
if c.Storage.CacheDir == "" {
c.Storage.CacheDir = "./devour_data/cache"
}
if c.Embeddings.Provider == "" {
c.Embeddings.Provider = "openai"
}
if c.Embeddings.Model == "" {
c.Embeddings.Model = "text-embedding-3-small"
}
if c.Embeddings.Dimensions <= 0 {
c.Embeddings.Dimensions = 1536
}
if c.Embeddings.BatchSize <= 0 {
c.Embeddings.BatchSize = 100
}
if c.VectorDB.Type == "" {
c.VectorDB.Type = "memory"
}
if c.VectorDB.SimilarityMetric == "" {
c.VectorDB.SimilarityMetric = "cosine"
}
if c.Scraper.UserAgent == "" {
c.Scraper.UserAgent = "Devour/1.0"
}
if c.Scraper.Timeout <= 0 {
c.Scraper.Timeout = 30 * time.Second
}
if c.Scraper.RetryCount <= 0 {
c.Scraper.RetryCount = 3
}
if c.Scraper.RetryDelay <= 0 {
c.Scraper.RetryDelay = 1 * time.Second
}
if c.Scraper.Concurrency <= 0 {
c.Scraper.Concurrency = 10
}
if c.Scraper.RateLimit < 0 {
c.Scraper.RateLimit = 0
}
if c.Scraper.MaxDepth <= 0 {
c.Scraper.MaxDepth = 3
}
if c.Scraper.CacheDir == "" {
c.Scraper.CacheDir = c.Storage.CacheDir
}
if c.Scheduler.Interval <= 0 {
c.Scheduler.Interval = 72 * time.Hour
}
if c.Scheduler.CheckMethod == "" {
c.Scheduler.CheckMethod = "hash"
}
if c.Server.Mode == "" {
c.Server.Mode = "local"
}
if c.Server.Transport == "" {
c.Server.Transport = "stdio"
}
if c.Server.Host == "" {
c.Server.Host = "localhost"
}
if c.Server.Port <= 0 {
c.Server.Port = 8080
}
if !c.Indexing.Enabled {
// keep explicit false but initialize defaults for remaining fields
}
if c.Indexing.SnippetLength <= 0 {
c.Indexing.SnippetLength = 220
}
if c.Indexing.MaxDocs <= 0 {
c.Indexing.MaxDocs = 10000
}
if c.Verification.Timeout <= 0 {
c.Verification.Timeout = 90 * time.Second
}
}
func findConfigPath(explicitPath string) (string, error) {
if strings.TrimSpace(explicitPath) != "" {
p, err := filepath.Abs(explicitPath)
if err != nil {
return "", err
}
if _, err := os.Stat(p); err != nil {
return "", fmt.Errorf("config file not found: %s", explicitPath)
}
return p, nil
}
candidates := []string{"./devour.yaml"}
if home, err := os.UserHomeDir(); err == nil {
candidates = append(candidates, filepath.Join(home, ".devour", "devour.yaml"))
}
for _, c := range candidates {
if _, err := os.Stat(c); err == nil {
p, absErr := filepath.Abs(c)
if absErr != nil {
return "", absErr
}
return p, nil
}
}
return "", nil
}
// EnsureStorageDirs creates required local storage directories.
func (c *Config) EnsureStorageDirs() error {
dirs := []string{c.Storage.DocsDir, c.Storage.IndexDir, c.Storage.MetadataDir, c.Storage.CacheDir}
for _, dir := range dirs {
if strings.TrimSpace(dir) == "" {
continue
}
if err := os.MkdirAll(dir, 0o755); err != nil {
return err
}
}
return nil
}