package cmd import ( "context" "crypto/sha256" "encoding/hex" "fmt" "strings" "time" "github.com/spf13/cobra" "github.com/yourorg/devour/internal/projectstate" "github.com/yourorg/devour/internal/scraper" "github.com/yourorg/devour/internal/search" "github.com/yourorg/devour/internal/storage" ) var syncCmd = &cobra.Command{ Use: "sync", Short: "Synchronize with configured sources", Long: `Fetch updates from all configured sources. Checks each source for changes (using hash or timestamp comparison) and updates the local docs + index accordingly. Examples: devour sync # Sync all sources devour sync --source my-docs # Sync specific source devour sync --rebuild # Full rebuild`, RunE: runSync, } var ( syncSource string syncRebuild bool syncForce bool ) func init() { syncCmd.Flags().StringVarP(&syncSource, "source", "s", "", "sync specific source only") syncCmd.Flags().BoolVar(&syncRebuild, "rebuild", false, "rebuild entire index") syncCmd.Flags().BoolVarP(&syncForce, "force", "f", false, "force sync even if no changes detected") } func runSync(cmd *cobra.Command, args []string) error { cfg, err := loadAppConfig() if err != nil { return err } if syncRebuild { fmt.Println("šŸ”„ Rebuilding local index from configured sources...") } else { fmt.Println("šŸ”„ Syncing configured sources...") } if len(cfg.Sources) == 0 { fmt.Println("No sources configured. Add sources in devour.yaml first.") return nil } state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir) if err != nil { return err } updated := 0 skipped := 0 failed := 0 totalDocs := 0 for _, srcCfg := range cfg.Sources { if syncSource != "" && srcCfg.Name != syncSource { continue } source := sourceFromConfig(srcCfg) if source.Type == "" { if source.URL != "" { source.Type = detectSourceType(source.URL) } else if source.Path != "" { source.Type = scraper.SourceTypeLocal } } if source.Name == "" { source.Name = extractName(source.URL) } applySourceProfile(source) fmt.Printf("\n• %s (%s)\n", source.Name, source.Type) s := scraper.NewScraper(source.Type, toScraperConfig(cfg, 0)) if s == nil { failed++ fmt.Printf(" āœ— unsupported source type: %s\n", source.Type) continue } key := source.Name if key == "" { key = chooseSourceLabel(source) } lastHash := "" if prev := state.Sources[key]; prev != nil { lastHash = prev.Hash } needsUpdate := syncForce || syncRebuild newHash := lastHash if !needsUpdate { changed, hash, detectErr := s.DetectChanges(context.Background(), source, lastHash) if detectErr != nil { fmt.Printf(" ⚠ change detection failed (%v), scraping anyway\n", detectErr) needsUpdate = true } else { needsUpdate = changed newHash = hash } } if !needsUpdate { skipped++ fmt.Println(" āœ“ no changes") continue } docs, scrapeErr := s.Scrape(context.Background(), source) if scrapeErr != nil { failed++ fmt.Printf(" āœ— scrape failed: %v\n", scrapeErr) state.Sources[key] = &projectstate.SourceState{ Name: source.Name, Type: string(source.Type), URL: source.URL, Hash: lastHash, LastSync: time.Now(), DocCount: 0, LastError: scrapeErr.Error(), } continue } saved, saveErr := storage.SaveDocuments(docs, storage.SaveOptions{ Format: "json", OutputDir: cfg.Storage.DocsDir, AllowEmpty: false, PrintWriter: nil, }) if saveErr != nil { failed++ fmt.Printf(" āœ— save failed: %v\n", saveErr) continue } if newHash == "" { h := sha256.New() for _, d := range docs { if d == nil { continue } fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL) } newHash = hex.EncodeToString(h.Sum(nil)) } state.Sources[key] = &projectstate.SourceState{ Name: source.Name, Type: string(source.Type), URL: source.URL, Hash: newHash, LastSync: time.Now(), DocCount: saved.Count, LastError: "", } updated++ totalDocs += saved.Count fmt.Printf(" āœ“ updated (%d docs)\n", saved.Count) } if err := projectstate.SaveSourceState(cfg.Storage.MetadataDir, state); err != nil { return err } if syncRebuild || updated > 0 { engine := search.NewEngine(cfg) if _, err := engine.Rebuild(context.Background()); err != nil { return fmt.Errorf("rebuild index: %w", err) } } fmt.Printf("\nSync summary: updated=%d skipped=%d failed=%d docs=%d\n", updated, skipped, failed, totalDocs) if failed > 0 { return fmt.Errorf("sync completed with failures") } if syncSource != "" && updated == 0 && skipped == 0 && failed == 0 { return fmt.Errorf("source %q not found in config", syncSource) } if strings.TrimSpace(syncSource) != "" { fmt.Printf("Synced source: %s\n", syncSource) } return nil }