Files
Devour/cmd/sync.go
Tomas Dvorak 898a3c303f update
2026-02-24 10:33:59 +01:00

203 lines
4.8 KiB
Go

package cmd
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"time"
"github.com/spf13/cobra"
"github.com/yourorg/devour/internal/projectstate"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/internal/search"
"github.com/yourorg/devour/internal/storage"
)
var syncCmd = &cobra.Command{
Use: "sync",
Short: "Synchronize with configured sources",
Long: `Fetch updates from all configured sources.
Checks each source for changes (using hash or timestamp comparison)
and updates the local docs + index accordingly.
Examples:
devour sync # Sync all sources
devour sync --source my-docs # Sync specific source
devour sync --rebuild # Full rebuild`,
RunE: runSync,
}
var (
syncSource string
syncRebuild bool
syncForce bool
)
func init() {
syncCmd.Flags().StringVarP(&syncSource, "source", "s", "", "sync specific source only")
syncCmd.Flags().BoolVar(&syncRebuild, "rebuild", false, "rebuild entire index")
syncCmd.Flags().BoolVarP(&syncForce, "force", "f", false, "force sync even if no changes detected")
}
func runSync(cmd *cobra.Command, args []string) error {
cfg, err := loadAppConfig()
if err != nil {
return err
}
if syncRebuild {
fmt.Println("🔄 Rebuilding local index from configured sources...")
} else {
fmt.Println("🔄 Syncing configured sources...")
}
if len(cfg.Sources) == 0 {
fmt.Println("No sources configured. Add sources in devour.yaml first.")
return nil
}
state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
if err != nil {
return err
}
updated := 0
skipped := 0
failed := 0
totalDocs := 0
for _, srcCfg := range cfg.Sources {
if syncSource != "" && srcCfg.Name != syncSource {
continue
}
source := sourceFromConfig(srcCfg)
if source.Type == "" {
if source.URL != "" {
source.Type = detectSourceType(source.URL)
} else if source.Path != "" {
source.Type = scraper.SourceTypeLocal
}
}
if source.Name == "" {
source.Name = extractName(source.URL)
}
applySourceProfile(source)
fmt.Printf("\n• %s (%s)\n", source.Name, source.Type)
s := scraper.NewScraper(source.Type, toScraperConfig(cfg, 0))
if s == nil {
failed++
fmt.Printf(" ✗ unsupported source type: %s\n", source.Type)
continue
}
key := source.Name
if key == "" {
key = chooseSourceLabel(source)
}
lastHash := ""
if prev := state.Sources[key]; prev != nil {
lastHash = prev.Hash
}
needsUpdate := syncForce || syncRebuild
newHash := lastHash
if !needsUpdate {
changed, hash, detectErr := s.DetectChanges(context.Background(), source, lastHash)
if detectErr != nil {
fmt.Printf(" ⚠ change detection failed (%v), scraping anyway\n", detectErr)
needsUpdate = true
} else {
needsUpdate = changed
newHash = hash
}
}
if !needsUpdate {
skipped++
fmt.Println(" ✓ no changes")
continue
}
docs, scrapeErr := s.Scrape(context.Background(), source)
if scrapeErr != nil {
failed++
fmt.Printf(" ✗ scrape failed: %v\n", scrapeErr)
state.Sources[key] = &projectstate.SourceState{
Name: source.Name,
Type: string(source.Type),
URL: source.URL,
Hash: lastHash,
LastSync: time.Now(),
DocCount: 0,
LastError: scrapeErr.Error(),
}
continue
}
saved, saveErr := storage.SaveDocuments(docs, storage.SaveOptions{
Format: "json",
OutputDir: cfg.Storage.DocsDir,
AllowEmpty: false,
PrintWriter: nil,
})
if saveErr != nil {
failed++
fmt.Printf(" ✗ save failed: %v\n", saveErr)
continue
}
if newHash == "" {
h := sha256.New()
for _, d := range docs {
if d == nil {
continue
}
fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL)
}
newHash = hex.EncodeToString(h.Sum(nil))
}
state.Sources[key] = &projectstate.SourceState{
Name: source.Name,
Type: string(source.Type),
URL: source.URL,
Hash: newHash,
LastSync: time.Now(),
DocCount: saved.Count,
LastError: "",
}
updated++
totalDocs += saved.Count
fmt.Printf(" ✓ updated (%d docs)\n", saved.Count)
}
if err := projectstate.SaveSourceState(cfg.Storage.MetadataDir, state); err != nil {
return err
}
if syncRebuild || updated > 0 {
engine := search.NewEngine(cfg)
if _, err := engine.Rebuild(context.Background()); err != nil {
return fmt.Errorf("rebuild index: %w", err)
}
}
fmt.Printf("\nSync summary: updated=%d skipped=%d failed=%d docs=%d\n", updated, skipped, failed, totalDocs)
if failed > 0 {
return fmt.Errorf("sync completed with failures")
}
if syncSource != "" && updated == 0 && skipped == 0 && failed == 0 {
return fmt.Errorf("source %q not found in config", syncSource)
}
if strings.TrimSpace(syncSource) != "" {
fmt.Printf("Synced source: %s\n", syncSource)
}
return nil
}