package cmd import ( "fmt" "path/filepath" "strings" "time" appconfig "github.com/yourorg/devour/internal/config" "github.com/yourorg/devour/internal/scraper" ) func loadAppConfig() (*appconfig.Config, error) { cfg, err := appconfig.Load(cfgFile) if err != nil { return nil, err } if err := cfg.EnsureStorageDirs(); err != nil { return nil, fmt.Errorf("ensure storage dirs: %w", err) } return cfg, nil } func toScraperConfig(c *appconfig.Config, concurrencyOverride int) *scraper.Config { sc := &scraper.Config{ UserAgent: c.Scraper.UserAgent, Timeout: c.Scraper.Timeout, RetryCount: c.Scraper.RetryCount, RetryDelay: c.Scraper.RetryDelay, Concurrency: c.Scraper.Concurrency, RateLimit: c.Scraper.RateLimit, MaxDepth: c.Scraper.MaxDepth, CacheDir: c.Scraper.CacheDir, } if concurrencyOverride > 0 { sc.Concurrency = concurrencyOverride } if sc.Timeout <= 0 { sc.Timeout = 30 * time.Second } if sc.RetryCount <= 0 { sc.RetryCount = 3 } if sc.RetryDelay <= 0 { sc.RetryDelay = 1 * time.Second } if sc.Concurrency <= 0 { sc.Concurrency = 10 } if sc.MaxDepth <= 0 { sc.MaxDepth = 2 } return sc } func sourceFromConfig(s appconfig.SourceConfig) *scraper.Source { return &scraper.Source{ Name: strings.TrimSpace(s.Name), Type: scraper.SourceType(strings.TrimSpace(s.Type)), URL: strings.TrimSpace(s.URL), Query: strings.TrimSpace(s.Query), ResultLimit: s.ResultLimit, Domains: append([]string(nil), s.Domains...), Repo: strings.TrimSpace(s.Repo), Branch: strings.TrimSpace(s.Branch), Path: strings.TrimSpace(s.Path), Include: append([]string(nil), s.Include...), Exclude: append([]string(nil), s.Exclude...), Schedule: strings.TrimSpace(s.Schedule), } } func resolveOutputDir(c *appconfig.Config, override string) string { if strings.TrimSpace(override) != "" { return override } if strings.TrimSpace(c.Storage.DocsDir) != "" { return c.Storage.DocsDir } return filepath.Join("devour_data", "docs") }