mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
203 lines
4.8 KiB
Go
203 lines
4.8 KiB
Go
package cmd
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
"github.com/yourorg/devour/internal/projectstate"
|
|
"github.com/yourorg/devour/internal/scraper"
|
|
"github.com/yourorg/devour/internal/search"
|
|
"github.com/yourorg/devour/internal/storage"
|
|
)
|
|
|
|
var syncCmd = &cobra.Command{
|
|
Use: "sync",
|
|
Short: "Synchronize with configured sources",
|
|
Long: `Fetch updates from all configured sources.
|
|
|
|
Checks each source for changes (using hash or timestamp comparison)
|
|
and updates the local docs + index accordingly.
|
|
|
|
Examples:
|
|
devour sync # Sync all sources
|
|
devour sync --source my-docs # Sync specific source
|
|
devour sync --rebuild # Full rebuild`,
|
|
RunE: runSync,
|
|
}
|
|
|
|
var (
|
|
syncSource string
|
|
syncRebuild bool
|
|
syncForce bool
|
|
)
|
|
|
|
func init() {
|
|
syncCmd.Flags().StringVarP(&syncSource, "source", "s", "", "sync specific source only")
|
|
syncCmd.Flags().BoolVar(&syncRebuild, "rebuild", false, "rebuild entire index")
|
|
syncCmd.Flags().BoolVarP(&syncForce, "force", "f", false, "force sync even if no changes detected")
|
|
}
|
|
|
|
func runSync(cmd *cobra.Command, args []string) error {
|
|
cfg, err := loadAppConfig()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if syncRebuild {
|
|
fmt.Println("🔄 Rebuilding local index from configured sources...")
|
|
} else {
|
|
fmt.Println("🔄 Syncing configured sources...")
|
|
}
|
|
|
|
if len(cfg.Sources) == 0 {
|
|
fmt.Println("No sources configured. Add sources in devour.yaml first.")
|
|
return nil
|
|
}
|
|
|
|
state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
updated := 0
|
|
skipped := 0
|
|
failed := 0
|
|
totalDocs := 0
|
|
|
|
for _, srcCfg := range cfg.Sources {
|
|
if syncSource != "" && srcCfg.Name != syncSource {
|
|
continue
|
|
}
|
|
|
|
source := sourceFromConfig(srcCfg)
|
|
if source.Type == "" {
|
|
if source.URL != "" {
|
|
source.Type = detectSourceType(source.URL)
|
|
} else if source.Path != "" {
|
|
source.Type = scraper.SourceTypeLocal
|
|
}
|
|
}
|
|
if source.Name == "" {
|
|
source.Name = extractName(source.URL)
|
|
}
|
|
applySourceProfile(source)
|
|
|
|
fmt.Printf("\n• %s (%s)\n", source.Name, source.Type)
|
|
s := scraper.NewScraper(source.Type, toScraperConfig(cfg, 0))
|
|
if s == nil {
|
|
failed++
|
|
fmt.Printf(" ✗ unsupported source type: %s\n", source.Type)
|
|
continue
|
|
}
|
|
|
|
key := source.Name
|
|
if key == "" {
|
|
key = chooseSourceLabel(source)
|
|
}
|
|
lastHash := ""
|
|
if prev := state.Sources[key]; prev != nil {
|
|
lastHash = prev.Hash
|
|
}
|
|
|
|
needsUpdate := syncForce || syncRebuild
|
|
newHash := lastHash
|
|
if !needsUpdate {
|
|
changed, hash, detectErr := s.DetectChanges(context.Background(), source, lastHash)
|
|
if detectErr != nil {
|
|
fmt.Printf(" ⚠ change detection failed (%v), scraping anyway\n", detectErr)
|
|
needsUpdate = true
|
|
} else {
|
|
needsUpdate = changed
|
|
newHash = hash
|
|
}
|
|
}
|
|
|
|
if !needsUpdate {
|
|
skipped++
|
|
fmt.Println(" ✓ no changes")
|
|
continue
|
|
}
|
|
|
|
docs, scrapeErr := s.Scrape(context.Background(), source)
|
|
if scrapeErr != nil {
|
|
failed++
|
|
fmt.Printf(" ✗ scrape failed: %v\n", scrapeErr)
|
|
state.Sources[key] = &projectstate.SourceState{
|
|
Name: source.Name,
|
|
Type: string(source.Type),
|
|
URL: source.URL,
|
|
Hash: lastHash,
|
|
LastSync: time.Now(),
|
|
DocCount: 0,
|
|
LastError: scrapeErr.Error(),
|
|
}
|
|
continue
|
|
}
|
|
|
|
saved, saveErr := storage.SaveDocuments(docs, storage.SaveOptions{
|
|
Format: "json",
|
|
OutputDir: cfg.Storage.DocsDir,
|
|
AllowEmpty: false,
|
|
PrintWriter: nil,
|
|
})
|
|
if saveErr != nil {
|
|
failed++
|
|
fmt.Printf(" ✗ save failed: %v\n", saveErr)
|
|
continue
|
|
}
|
|
|
|
if newHash == "" {
|
|
h := sha256.New()
|
|
for _, d := range docs {
|
|
if d == nil {
|
|
continue
|
|
}
|
|
fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL)
|
|
}
|
|
newHash = hex.EncodeToString(h.Sum(nil))
|
|
}
|
|
|
|
state.Sources[key] = &projectstate.SourceState{
|
|
Name: source.Name,
|
|
Type: string(source.Type),
|
|
URL: source.URL,
|
|
Hash: newHash,
|
|
LastSync: time.Now(),
|
|
DocCount: saved.Count,
|
|
LastError: "",
|
|
}
|
|
|
|
updated++
|
|
totalDocs += saved.Count
|
|
fmt.Printf(" ✓ updated (%d docs)\n", saved.Count)
|
|
}
|
|
|
|
if err := projectstate.SaveSourceState(cfg.Storage.MetadataDir, state); err != nil {
|
|
return err
|
|
}
|
|
|
|
if syncRebuild || updated > 0 {
|
|
engine := search.NewEngine(cfg)
|
|
if _, err := engine.Rebuild(context.Background()); err != nil {
|
|
return fmt.Errorf("rebuild index: %w", err)
|
|
}
|
|
}
|
|
|
|
fmt.Printf("\nSync summary: updated=%d skipped=%d failed=%d docs=%d\n", updated, skipped, failed, totalDocs)
|
|
if failed > 0 {
|
|
return fmt.Errorf("sync completed with failures")
|
|
}
|
|
if syncSource != "" && updated == 0 && skipped == 0 && failed == 0 {
|
|
return fmt.Errorf("source %q not found in config", syncSource)
|
|
}
|
|
if strings.TrimSpace(syncSource) != "" {
|
|
fmt.Printf("Synced source: %s\n", syncSource)
|
|
}
|
|
return nil
|
|
}
|