mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
82 lines
2.0 KiB
Go
82 lines
2.0 KiB
Go
package cmd
|
|
|
|
import (
|
|
"fmt"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
appconfig "github.com/yourorg/devour/internal/config"
|
|
"github.com/yourorg/devour/internal/scraper"
|
|
)
|
|
|
|
func loadAppConfig() (*appconfig.Config, error) {
|
|
cfg, err := appconfig.Load(cfgFile)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := cfg.EnsureStorageDirs(); err != nil {
|
|
return nil, fmt.Errorf("ensure storage dirs: %w", err)
|
|
}
|
|
return cfg, nil
|
|
}
|
|
|
|
func toScraperConfig(c *appconfig.Config, concurrencyOverride int) *scraper.Config {
|
|
sc := &scraper.Config{
|
|
UserAgent: c.Scraper.UserAgent,
|
|
Timeout: c.Scraper.Timeout,
|
|
RetryCount: c.Scraper.RetryCount,
|
|
RetryDelay: c.Scraper.RetryDelay,
|
|
Concurrency: c.Scraper.Concurrency,
|
|
RateLimit: c.Scraper.RateLimit,
|
|
MaxDepth: c.Scraper.MaxDepth,
|
|
CacheDir: c.Scraper.CacheDir,
|
|
}
|
|
if concurrencyOverride > 0 {
|
|
sc.Concurrency = concurrencyOverride
|
|
}
|
|
if sc.Timeout <= 0 {
|
|
sc.Timeout = 30 * time.Second
|
|
}
|
|
if sc.RetryCount <= 0 {
|
|
sc.RetryCount = 3
|
|
}
|
|
if sc.RetryDelay <= 0 {
|
|
sc.RetryDelay = 1 * time.Second
|
|
}
|
|
if sc.Concurrency <= 0 {
|
|
sc.Concurrency = 10
|
|
}
|
|
if sc.MaxDepth <= 0 {
|
|
sc.MaxDepth = 2
|
|
}
|
|
return sc
|
|
}
|
|
|
|
func sourceFromConfig(s appconfig.SourceConfig) *scraper.Source {
|
|
return &scraper.Source{
|
|
Name: strings.TrimSpace(s.Name),
|
|
Type: scraper.SourceType(strings.TrimSpace(s.Type)),
|
|
URL: strings.TrimSpace(s.URL),
|
|
Query: strings.TrimSpace(s.Query),
|
|
ResultLimit: s.ResultLimit,
|
|
Domains: append([]string(nil), s.Domains...),
|
|
Repo: strings.TrimSpace(s.Repo),
|
|
Branch: strings.TrimSpace(s.Branch),
|
|
Path: strings.TrimSpace(s.Path),
|
|
Include: append([]string(nil), s.Include...),
|
|
Exclude: append([]string(nil), s.Exclude...),
|
|
Schedule: strings.TrimSpace(s.Schedule),
|
|
}
|
|
}
|
|
|
|
func resolveOutputDir(c *appconfig.Config, override string) string {
|
|
if strings.TrimSpace(override) != "" {
|
|
return override
|
|
}
|
|
if strings.TrimSpace(c.Storage.DocsDir) != "" {
|
|
return c.Storage.DocsDir
|
|
}
|
|
return filepath.Join("devour_data", "docs")
|
|
}
|