This commit is contained in:
Tomas Dvorak
2026-02-24 10:33:59 +01:00
parent 409acd2e08
commit 898a3c303f
1374 changed files with 290409 additions and 29187 deletions
+157 -17
View File
@@ -1,9 +1,18 @@
package cmd
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"time"
"github.com/spf13/cobra"
"github.com/yourorg/devour/internal/projectstate"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/internal/search"
"github.com/yourorg/devour/internal/storage"
)
var syncCmd = &cobra.Command{
@@ -12,7 +21,7 @@ var syncCmd = &cobra.Command{
Long: `Fetch updates from all configured sources.
Checks each source for changes (using hash or timestamp comparison)
and updates the index accordingly.
and updates the local docs + index accordingly.
Examples:
devour sync # Sync all sources
@@ -34,29 +43,160 @@ func init() {
}
func runSync(cmd *cobra.Command, args []string) error {
cfg, err := loadAppConfig()
if err != nil {
return err
}
if syncRebuild {
fmt.Println("🔄 Rebuilding index from all sources...")
fmt.Println("🔄 Rebuilding local index from configured sources...")
} else {
fmt.Println("🔄 Syncing with configured sources...")
fmt.Println("🔄 Syncing configured sources...")
}
if syncSource != "" {
fmt.Printf(" Source: %s\n", syncSource)
if len(cfg.Sources) == 0 {
fmt.Println("No sources configured. Add sources in devour.yaml first.")
return nil
}
// TODO: Implement actual sync logic
// 1. Load sources from config
// 2. For each source:
// a. Check for changes (hash/timestamp)
// b. If changes detected or --force:
// - Scrape updated content
// - Re-generate embeddings
// - Update index
// 3. Update metadata
state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
if err != nil {
return err
}
fmt.Println()
fmt.Println("⚠️ Sync functionality not yet implemented")
fmt.Println(" Configure sources in devour.yaml first")
updated := 0
skipped := 0
failed := 0
totalDocs := 0
for _, srcCfg := range cfg.Sources {
if syncSource != "" && srcCfg.Name != syncSource {
continue
}
source := sourceFromConfig(srcCfg)
if source.Type == "" {
if source.URL != "" {
source.Type = detectSourceType(source.URL)
} else if source.Path != "" {
source.Type = scraper.SourceTypeLocal
}
}
if source.Name == "" {
source.Name = extractName(source.URL)
}
applySourceProfile(source)
fmt.Printf("\n• %s (%s)\n", source.Name, source.Type)
s := scraper.NewScraper(source.Type, toScraperConfig(cfg, 0))
if s == nil {
failed++
fmt.Printf(" ✗ unsupported source type: %s\n", source.Type)
continue
}
key := source.Name
if key == "" {
key = chooseSourceLabel(source)
}
lastHash := ""
if prev := state.Sources[key]; prev != nil {
lastHash = prev.Hash
}
needsUpdate := syncForce || syncRebuild
newHash := lastHash
if !needsUpdate {
changed, hash, detectErr := s.DetectChanges(context.Background(), source, lastHash)
if detectErr != nil {
fmt.Printf(" ⚠ change detection failed (%v), scraping anyway\n", detectErr)
needsUpdate = true
} else {
needsUpdate = changed
newHash = hash
}
}
if !needsUpdate {
skipped++
fmt.Println(" ✓ no changes")
continue
}
docs, scrapeErr := s.Scrape(context.Background(), source)
if scrapeErr != nil {
failed++
fmt.Printf(" ✗ scrape failed: %v\n", scrapeErr)
state.Sources[key] = &projectstate.SourceState{
Name: source.Name,
Type: string(source.Type),
URL: source.URL,
Hash: lastHash,
LastSync: time.Now(),
DocCount: 0,
LastError: scrapeErr.Error(),
}
continue
}
saved, saveErr := storage.SaveDocuments(docs, storage.SaveOptions{
Format: "json",
OutputDir: cfg.Storage.DocsDir,
AllowEmpty: false,
PrintWriter: nil,
})
if saveErr != nil {
failed++
fmt.Printf(" ✗ save failed: %v\n", saveErr)
continue
}
if newHash == "" {
h := sha256.New()
for _, d := range docs {
if d == nil {
continue
}
fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL)
}
newHash = hex.EncodeToString(h.Sum(nil))
}
state.Sources[key] = &projectstate.SourceState{
Name: source.Name,
Type: string(source.Type),
URL: source.URL,
Hash: newHash,
LastSync: time.Now(),
DocCount: saved.Count,
LastError: "",
}
updated++
totalDocs += saved.Count
fmt.Printf(" ✓ updated (%d docs)\n", saved.Count)
}
if err := projectstate.SaveSourceState(cfg.Storage.MetadataDir, state); err != nil {
return err
}
if syncRebuild || updated > 0 {
engine := search.NewEngine(cfg)
if _, err := engine.Rebuild(context.Background()); err != nil {
return fmt.Errorf("rebuild index: %w", err)
}
}
fmt.Printf("\nSync summary: updated=%d skipped=%d failed=%d docs=%d\n", updated, skipped, failed, totalDocs)
if failed > 0 {
return fmt.Errorf("sync completed with failures")
}
if syncSource != "" && updated == 0 && skipped == 0 && failed == 0 {
return fmt.Errorf("source %q not found in config", syncSource)
}
if strings.TrimSpace(syncSource) != "" {
fmt.Printf("Synced source: %s\n", syncSource)
}
return nil
}