package config import ( "fmt" "os" "path/filepath" "strings" "time" "gopkg.in/yaml.v3" ) // Config is the typed application configuration loaded from devour.yaml. type Config struct { Version int `yaml:"version"` Storage StorageConfig `yaml:"storage"` Embeddings EmbeddingsConfig `yaml:"embeddings"` VectorDB VectorDBConfig `yaml:"vector_db"` Scraper ScraperConfig `yaml:"scraper"` Scheduler SchedulerConfig `yaml:"scheduler"` Server ServerConfig `yaml:"server"` Indexing IndexingConfig `yaml:"indexing"` Verification VerificationConfig `yaml:"verification"` Sources []SourceConfig `yaml:"sources"` ConfigPath string `yaml:"-"` } type StorageConfig struct { DocsDir string `yaml:"docs_dir"` IndexDir string `yaml:"index_dir"` MetadataDir string `yaml:"metadata_dir"` CacheDir string `yaml:"cache_dir"` } type EmbeddingsConfig struct { Provider string `yaml:"provider"` Model string `yaml:"model"` Dimensions int `yaml:"dimensions"` APIKey string `yaml:"api_key"` BatchSize int `yaml:"batch_size"` BaseURL string `yaml:"base_url"` } type VectorDBConfig struct { Type string `yaml:"type"` Persist bool `yaml:"persist"` SimilarityMetric string `yaml:"similarity_metric"` PersistDir string `yaml:"persist_dir"` } type ScraperConfig struct { UserAgent string `yaml:"user_agent"` Timeout time.Duration `yaml:"timeout"` RetryCount int `yaml:"retry_count"` RetryDelay time.Duration `yaml:"retry_delay"` Concurrency int `yaml:"concurrency"` RateLimit time.Duration `yaml:"rate_limit"` MaxDepth int `yaml:"max_depth"` CacheDir string `yaml:"cache_dir"` } type SchedulerConfig struct { Enabled bool `yaml:"enabled"` Interval time.Duration `yaml:"interval"` CheckMethod string `yaml:"check_method"` OnStartup bool `yaml:"on_startup"` } type ServerConfig struct { Mode string `yaml:"mode"` Transport string `yaml:"transport"` Host string `yaml:"host"` Port int `yaml:"port"` } type IndexingConfig struct { Enabled bool `yaml:"enabled"` AutoReindex bool `yaml:"auto_reindex"` SnippetLength int `yaml:"snippet_length"` MaxDocs int `yaml:"max_docs"` } type VerificationConfig struct { Enabled bool `yaml:"enabled"` Timeout time.Duration `yaml:"timeout"` } type SourceConfig struct { Name string `yaml:"name"` Type string `yaml:"type"` URL string `yaml:"url,omitempty"` Query string `yaml:"query,omitempty"` ResultLimit int `yaml:"result_limit,omitempty"` Domains []string `yaml:"domains,omitempty"` Repo string `yaml:"repo,omitempty"` Branch string `yaml:"branch,omitempty"` Path string `yaml:"path,omitempty"` Include []string `yaml:"include,omitempty"` Exclude []string `yaml:"exclude,omitempty"` Schedule string `yaml:"schedule,omitempty"` } // Default returns a default configuration that matches devour init behavior. func Default() *Config { return &Config{ Version: 1, Storage: StorageConfig{ DocsDir: "./devour_data/docs", IndexDir: "./devour_data/index", MetadataDir: "./devour_data/metadata", CacheDir: "./devour_data/cache", }, Embeddings: EmbeddingsConfig{ Provider: "openai", Model: "text-embedding-3-small", Dimensions: 1536, BatchSize: 100, APIKey: "${OPENAI_API_KEY}", }, VectorDB: VectorDBConfig{ Type: "memory", Persist: true, SimilarityMetric: "cosine", }, Scraper: ScraperConfig{ UserAgent: "Devour/1.0", Timeout: 30 * time.Second, RetryCount: 3, RetryDelay: 1 * time.Second, Concurrency: 10, RateLimit: 500 * time.Millisecond, MaxDepth: 3, CacheDir: "./devour_data/cache", }, Scheduler: SchedulerConfig{ Enabled: true, Interval: 72 * time.Hour, CheckMethod: "hash", OnStartup: false, }, Server: ServerConfig{ Mode: "local", Transport: "stdio", Host: "localhost", Port: 8080, }, Indexing: IndexingConfig{ Enabled: true, AutoReindex: true, SnippetLength: 220, MaxDocs: 10000, }, Verification: VerificationConfig{ Enabled: true, Timeout: 90 * time.Second, }, Sources: []SourceConfig{}, } } const initTemplateSourcesComment = ` # Sources (add your own) sources: [] # - name: example-docs # type: url # url: https://docs.example.com # include: ["**/*.md", "**/*.html"] # - name: local-searxng # type: localsearch # url: http://127.0.0.1:8080/search # query: golang http client # result_limit: 8 # domains: ["pkg.go.dev", "go.dev"] ` // RenderInitYAML returns the default init config file content from canonical defaults. func RenderInitYAML(remote bool) (string, error) { cfg := Default() if remote { cfg.Server.Mode = "remote" } // Keep the init template comments for discoverability while sourcing // the actual values from canonical defaults. cfg.Sources = nil body, err := yaml.Marshal(cfg) if err != nil { return "", fmt.Errorf("marshal default config: %w", err) } trimmed := strings.TrimSuffix(string(body), "\n") if strings.HasSuffix(trimmed, "sources: []") { trimmed = strings.TrimSuffix(trimmed, "sources: []") trimmed = strings.TrimSpace(trimmed) } return "# Devour Configuration\n" + trimmed + initTemplateSourcesComment, nil } // Load loads configuration from an explicit path or the default search paths. func Load(explicitPath string) (*Config, error) { cfg := Default() path, err := findConfigPath(explicitPath) if err != nil { return nil, err } if path == "" { cfg.ApplyDefaults() return cfg, nil } b, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("read config: %w", err) } if err := yaml.Unmarshal(b, cfg); err != nil { return nil, fmt.Errorf("parse config: %w", err) } cfg.ConfigPath = path cfg.ApplyDefaults() return cfg, nil } // ApplyDefaults ensures additive backward-compatible defaults after unmarshaling. func (c *Config) ApplyDefaults() { if c.Version == 0 { c.Version = 1 } if c.Storage.DocsDir == "" { c.Storage.DocsDir = "./devour_data/docs" } if c.Storage.IndexDir == "" { c.Storage.IndexDir = "./devour_data/index" } if c.Storage.MetadataDir == "" { c.Storage.MetadataDir = "./devour_data/metadata" } if c.Storage.CacheDir == "" { c.Storage.CacheDir = "./devour_data/cache" } if c.Embeddings.Provider == "" { c.Embeddings.Provider = "openai" } if c.Embeddings.Model == "" { c.Embeddings.Model = "text-embedding-3-small" } if c.Embeddings.Dimensions <= 0 { c.Embeddings.Dimensions = 1536 } if c.Embeddings.BatchSize <= 0 { c.Embeddings.BatchSize = 100 } if c.VectorDB.Type == "" { c.VectorDB.Type = "memory" } if c.VectorDB.SimilarityMetric == "" { c.VectorDB.SimilarityMetric = "cosine" } if c.Scraper.UserAgent == "" { c.Scraper.UserAgent = "Devour/1.0" } if c.Scraper.Timeout <= 0 { c.Scraper.Timeout = 30 * time.Second } if c.Scraper.RetryCount <= 0 { c.Scraper.RetryCount = 3 } if c.Scraper.RetryDelay <= 0 { c.Scraper.RetryDelay = 1 * time.Second } if c.Scraper.Concurrency <= 0 { c.Scraper.Concurrency = 10 } if c.Scraper.RateLimit < 0 { c.Scraper.RateLimit = 0 } if c.Scraper.MaxDepth <= 0 { c.Scraper.MaxDepth = 3 } if c.Scraper.CacheDir == "" { c.Scraper.CacheDir = c.Storage.CacheDir } if c.Scheduler.Interval <= 0 { c.Scheduler.Interval = 72 * time.Hour } if c.Scheduler.CheckMethod == "" { c.Scheduler.CheckMethod = "hash" } if c.Server.Mode == "" { c.Server.Mode = "local" } if c.Server.Transport == "" { c.Server.Transport = "stdio" } if c.Server.Host == "" { c.Server.Host = "localhost" } if c.Server.Port <= 0 { c.Server.Port = 8080 } if !c.Indexing.Enabled { // keep explicit false but initialize defaults for remaining fields } if c.Indexing.SnippetLength <= 0 { c.Indexing.SnippetLength = 220 } if c.Indexing.MaxDocs <= 0 { c.Indexing.MaxDocs = 10000 } if c.Verification.Timeout <= 0 { c.Verification.Timeout = 90 * time.Second } } func findConfigPath(explicitPath string) (string, error) { if strings.TrimSpace(explicitPath) != "" { p, err := filepath.Abs(explicitPath) if err != nil { return "", err } if _, err := os.Stat(p); err != nil { return "", fmt.Errorf("config file not found: %s", explicitPath) } return p, nil } candidates := []string{"./devour.yaml"} if home, err := os.UserHomeDir(); err == nil { candidates = append(candidates, filepath.Join(home, ".devour", "devour.yaml")) } for _, c := range candidates { if _, err := os.Stat(c); err == nil { p, absErr := filepath.Abs(c) if absErr != nil { return "", absErr } return p, nil } } return "", nil } // EnsureStorageDirs creates required local storage directories. func (c *Config) EnsureStorageDirs() error { dirs := []string{c.Storage.DocsDir, c.Storage.IndexDir, c.Storage.MetadataDir, c.Storage.CacheDir} for _, dir := range dirs { if strings.TrimSpace(dir) == "" { continue } if err := os.MkdirAll(dir, 0o755); err != nil { return err } } return nil }