mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
update
This commit is contained in:
@@ -0,0 +1,368 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Config is the typed application configuration loaded from devour.yaml.
|
||||
type Config struct {
|
||||
Version int `yaml:"version"`
|
||||
Storage StorageConfig `yaml:"storage"`
|
||||
Embeddings EmbeddingsConfig `yaml:"embeddings"`
|
||||
VectorDB VectorDBConfig `yaml:"vector_db"`
|
||||
Scraper ScraperConfig `yaml:"scraper"`
|
||||
Scheduler SchedulerConfig `yaml:"scheduler"`
|
||||
Server ServerConfig `yaml:"server"`
|
||||
Indexing IndexingConfig `yaml:"indexing"`
|
||||
Verification VerificationConfig `yaml:"verification"`
|
||||
Sources []SourceConfig `yaml:"sources"`
|
||||
|
||||
ConfigPath string `yaml:"-"`
|
||||
}
|
||||
|
||||
type StorageConfig struct {
|
||||
DocsDir string `yaml:"docs_dir"`
|
||||
IndexDir string `yaml:"index_dir"`
|
||||
MetadataDir string `yaml:"metadata_dir"`
|
||||
CacheDir string `yaml:"cache_dir"`
|
||||
}
|
||||
|
||||
type EmbeddingsConfig struct {
|
||||
Provider string `yaml:"provider"`
|
||||
Model string `yaml:"model"`
|
||||
Dimensions int `yaml:"dimensions"`
|
||||
APIKey string `yaml:"api_key"`
|
||||
BatchSize int `yaml:"batch_size"`
|
||||
BaseURL string `yaml:"base_url"`
|
||||
}
|
||||
|
||||
type VectorDBConfig struct {
|
||||
Type string `yaml:"type"`
|
||||
Persist bool `yaml:"persist"`
|
||||
SimilarityMetric string `yaml:"similarity_metric"`
|
||||
PersistDir string `yaml:"persist_dir"`
|
||||
}
|
||||
|
||||
type ScraperConfig struct {
|
||||
UserAgent string `yaml:"user_agent"`
|
||||
Timeout time.Duration `yaml:"timeout"`
|
||||
RetryCount int `yaml:"retry_count"`
|
||||
RetryDelay time.Duration `yaml:"retry_delay"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
RateLimit time.Duration `yaml:"rate_limit"`
|
||||
MaxDepth int `yaml:"max_depth"`
|
||||
CacheDir string `yaml:"cache_dir"`
|
||||
}
|
||||
|
||||
type SchedulerConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Interval time.Duration `yaml:"interval"`
|
||||
CheckMethod string `yaml:"check_method"`
|
||||
OnStartup bool `yaml:"on_startup"`
|
||||
}
|
||||
|
||||
type ServerConfig struct {
|
||||
Mode string `yaml:"mode"`
|
||||
Transport string `yaml:"transport"`
|
||||
Host string `yaml:"host"`
|
||||
Port int `yaml:"port"`
|
||||
}
|
||||
|
||||
type IndexingConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
AutoReindex bool `yaml:"auto_reindex"`
|
||||
SnippetLength int `yaml:"snippet_length"`
|
||||
MaxDocs int `yaml:"max_docs"`
|
||||
}
|
||||
|
||||
type VerificationConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Timeout time.Duration `yaml:"timeout"`
|
||||
}
|
||||
|
||||
type SourceConfig struct {
|
||||
Name string `yaml:"name"`
|
||||
Type string `yaml:"type"`
|
||||
URL string `yaml:"url,omitempty"`
|
||||
Query string `yaml:"query,omitempty"`
|
||||
ResultLimit int `yaml:"result_limit,omitempty"`
|
||||
Domains []string `yaml:"domains,omitempty"`
|
||||
Repo string `yaml:"repo,omitempty"`
|
||||
Branch string `yaml:"branch,omitempty"`
|
||||
Path string `yaml:"path,omitempty"`
|
||||
Include []string `yaml:"include,omitempty"`
|
||||
Exclude []string `yaml:"exclude,omitempty"`
|
||||
Schedule string `yaml:"schedule,omitempty"`
|
||||
}
|
||||
|
||||
// Default returns a default configuration that matches devour init behavior.
|
||||
func Default() *Config {
|
||||
return &Config{
|
||||
Version: 1,
|
||||
Storage: StorageConfig{
|
||||
DocsDir: "./devour_data/docs",
|
||||
IndexDir: "./devour_data/index",
|
||||
MetadataDir: "./devour_data/metadata",
|
||||
CacheDir: "./devour_data/cache",
|
||||
},
|
||||
Embeddings: EmbeddingsConfig{
|
||||
Provider: "openai",
|
||||
Model: "text-embedding-3-small",
|
||||
Dimensions: 1536,
|
||||
BatchSize: 100,
|
||||
APIKey: "${OPENAI_API_KEY}",
|
||||
},
|
||||
VectorDB: VectorDBConfig{
|
||||
Type: "memory",
|
||||
Persist: true,
|
||||
SimilarityMetric: "cosine",
|
||||
},
|
||||
Scraper: ScraperConfig{
|
||||
UserAgent: "Devour/1.0",
|
||||
Timeout: 30 * time.Second,
|
||||
RetryCount: 3,
|
||||
RetryDelay: 1 * time.Second,
|
||||
Concurrency: 10,
|
||||
RateLimit: 500 * time.Millisecond,
|
||||
MaxDepth: 3,
|
||||
CacheDir: "./devour_data/cache",
|
||||
},
|
||||
Scheduler: SchedulerConfig{
|
||||
Enabled: true,
|
||||
Interval: 72 * time.Hour,
|
||||
CheckMethod: "hash",
|
||||
OnStartup: false,
|
||||
},
|
||||
Server: ServerConfig{
|
||||
Mode: "local",
|
||||
Transport: "stdio",
|
||||
Host: "localhost",
|
||||
Port: 8080,
|
||||
},
|
||||
Indexing: IndexingConfig{
|
||||
Enabled: true,
|
||||
AutoReindex: true,
|
||||
SnippetLength: 220,
|
||||
MaxDocs: 10000,
|
||||
},
|
||||
Verification: VerificationConfig{
|
||||
Enabled: true,
|
||||
Timeout: 90 * time.Second,
|
||||
},
|
||||
Sources: []SourceConfig{},
|
||||
}
|
||||
}
|
||||
|
||||
const initTemplateSourcesComment = `
|
||||
# Sources (add your own)
|
||||
sources: []
|
||||
# - name: example-docs
|
||||
# type: url
|
||||
# url: https://docs.example.com
|
||||
# include: ["**/*.md", "**/*.html"]
|
||||
# - name: local-searxng
|
||||
# type: localsearch
|
||||
# url: http://127.0.0.1:8080/search
|
||||
# query: golang http client
|
||||
# result_limit: 8
|
||||
# domains: ["pkg.go.dev", "go.dev"]
|
||||
`
|
||||
|
||||
// RenderInitYAML returns the default init config file content from canonical defaults.
|
||||
func RenderInitYAML(remote bool) (string, error) {
|
||||
cfg := Default()
|
||||
if remote {
|
||||
cfg.Server.Mode = "remote"
|
||||
}
|
||||
// Keep the init template comments for discoverability while sourcing
|
||||
// the actual values from canonical defaults.
|
||||
cfg.Sources = nil
|
||||
|
||||
body, err := yaml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("marshal default config: %w", err)
|
||||
}
|
||||
|
||||
trimmed := strings.TrimSuffix(string(body), "\n")
|
||||
if strings.HasSuffix(trimmed, "sources: []") {
|
||||
trimmed = strings.TrimSuffix(trimmed, "sources: []")
|
||||
trimmed = strings.TrimSpace(trimmed)
|
||||
}
|
||||
|
||||
return "# Devour Configuration\n" + trimmed + initTemplateSourcesComment, nil
|
||||
}
|
||||
|
||||
// Load loads configuration from an explicit path or the default search paths.
|
||||
func Load(explicitPath string) (*Config, error) {
|
||||
cfg := Default()
|
||||
|
||||
path, err := findConfigPath(explicitPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if path == "" {
|
||||
cfg.ApplyDefaults()
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read config: %w", err)
|
||||
}
|
||||
|
||||
if err := yaml.Unmarshal(b, cfg); err != nil {
|
||||
return nil, fmt.Errorf("parse config: %w", err)
|
||||
}
|
||||
|
||||
cfg.ConfigPath = path
|
||||
cfg.ApplyDefaults()
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// ApplyDefaults ensures additive backward-compatible defaults after unmarshaling.
|
||||
func (c *Config) ApplyDefaults() {
|
||||
if c.Version == 0 {
|
||||
c.Version = 1
|
||||
}
|
||||
|
||||
if c.Storage.DocsDir == "" {
|
||||
c.Storage.DocsDir = "./devour_data/docs"
|
||||
}
|
||||
if c.Storage.IndexDir == "" {
|
||||
c.Storage.IndexDir = "./devour_data/index"
|
||||
}
|
||||
if c.Storage.MetadataDir == "" {
|
||||
c.Storage.MetadataDir = "./devour_data/metadata"
|
||||
}
|
||||
if c.Storage.CacheDir == "" {
|
||||
c.Storage.CacheDir = "./devour_data/cache"
|
||||
}
|
||||
|
||||
if c.Embeddings.Provider == "" {
|
||||
c.Embeddings.Provider = "openai"
|
||||
}
|
||||
if c.Embeddings.Model == "" {
|
||||
c.Embeddings.Model = "text-embedding-3-small"
|
||||
}
|
||||
if c.Embeddings.Dimensions <= 0 {
|
||||
c.Embeddings.Dimensions = 1536
|
||||
}
|
||||
if c.Embeddings.BatchSize <= 0 {
|
||||
c.Embeddings.BatchSize = 100
|
||||
}
|
||||
|
||||
if c.VectorDB.Type == "" {
|
||||
c.VectorDB.Type = "memory"
|
||||
}
|
||||
if c.VectorDB.SimilarityMetric == "" {
|
||||
c.VectorDB.SimilarityMetric = "cosine"
|
||||
}
|
||||
|
||||
if c.Scraper.UserAgent == "" {
|
||||
c.Scraper.UserAgent = "Devour/1.0"
|
||||
}
|
||||
if c.Scraper.Timeout <= 0 {
|
||||
c.Scraper.Timeout = 30 * time.Second
|
||||
}
|
||||
if c.Scraper.RetryCount <= 0 {
|
||||
c.Scraper.RetryCount = 3
|
||||
}
|
||||
if c.Scraper.RetryDelay <= 0 {
|
||||
c.Scraper.RetryDelay = 1 * time.Second
|
||||
}
|
||||
if c.Scraper.Concurrency <= 0 {
|
||||
c.Scraper.Concurrency = 10
|
||||
}
|
||||
if c.Scraper.RateLimit < 0 {
|
||||
c.Scraper.RateLimit = 0
|
||||
}
|
||||
if c.Scraper.MaxDepth <= 0 {
|
||||
c.Scraper.MaxDepth = 3
|
||||
}
|
||||
if c.Scraper.CacheDir == "" {
|
||||
c.Scraper.CacheDir = c.Storage.CacheDir
|
||||
}
|
||||
|
||||
if c.Scheduler.Interval <= 0 {
|
||||
c.Scheduler.Interval = 72 * time.Hour
|
||||
}
|
||||
if c.Scheduler.CheckMethod == "" {
|
||||
c.Scheduler.CheckMethod = "hash"
|
||||
}
|
||||
|
||||
if c.Server.Mode == "" {
|
||||
c.Server.Mode = "local"
|
||||
}
|
||||
if c.Server.Transport == "" {
|
||||
c.Server.Transport = "stdio"
|
||||
}
|
||||
if c.Server.Host == "" {
|
||||
c.Server.Host = "localhost"
|
||||
}
|
||||
if c.Server.Port <= 0 {
|
||||
c.Server.Port = 8080
|
||||
}
|
||||
|
||||
if !c.Indexing.Enabled {
|
||||
// keep explicit false but initialize defaults for remaining fields
|
||||
}
|
||||
if c.Indexing.SnippetLength <= 0 {
|
||||
c.Indexing.SnippetLength = 220
|
||||
}
|
||||
if c.Indexing.MaxDocs <= 0 {
|
||||
c.Indexing.MaxDocs = 10000
|
||||
}
|
||||
|
||||
if c.Verification.Timeout <= 0 {
|
||||
c.Verification.Timeout = 90 * time.Second
|
||||
}
|
||||
}
|
||||
|
||||
func findConfigPath(explicitPath string) (string, error) {
|
||||
if strings.TrimSpace(explicitPath) != "" {
|
||||
p, err := filepath.Abs(explicitPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
return "", fmt.Errorf("config file not found: %s", explicitPath)
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
candidates := []string{"./devour.yaml"}
|
||||
if home, err := os.UserHomeDir(); err == nil {
|
||||
candidates = append(candidates, filepath.Join(home, ".devour", "devour.yaml"))
|
||||
}
|
||||
|
||||
for _, c := range candidates {
|
||||
if _, err := os.Stat(c); err == nil {
|
||||
p, absErr := filepath.Abs(c)
|
||||
if absErr != nil {
|
||||
return "", absErr
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// EnsureStorageDirs creates required local storage directories.
|
||||
func (c *Config) EnsureStorageDirs() error {
|
||||
dirs := []string{c.Storage.DocsDir, c.Storage.IndexDir, c.Storage.MetadataDir, c.Storage.CacheDir}
|
||||
for _, dir := range dirs {
|
||||
if strings.TrimSpace(dir) == "" {
|
||||
continue
|
||||
}
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user