mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
update
This commit is contained in:
+157
-17
@@ -1,9 +1,18 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/yourorg/devour/internal/projectstate"
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
"github.com/yourorg/devour/internal/search"
|
||||
"github.com/yourorg/devour/internal/storage"
|
||||
)
|
||||
|
||||
var syncCmd = &cobra.Command{
|
||||
@@ -12,7 +21,7 @@ var syncCmd = &cobra.Command{
|
||||
Long: `Fetch updates from all configured sources.
|
||||
|
||||
Checks each source for changes (using hash or timestamp comparison)
|
||||
and updates the index accordingly.
|
||||
and updates the local docs + index accordingly.
|
||||
|
||||
Examples:
|
||||
devour sync # Sync all sources
|
||||
@@ -34,29 +43,160 @@ func init() {
|
||||
}
|
||||
|
||||
func runSync(cmd *cobra.Command, args []string) error {
|
||||
cfg, err := loadAppConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if syncRebuild {
|
||||
fmt.Println("🔄 Rebuilding index from all sources...")
|
||||
fmt.Println("🔄 Rebuilding local index from configured sources...")
|
||||
} else {
|
||||
fmt.Println("🔄 Syncing with configured sources...")
|
||||
fmt.Println("🔄 Syncing configured sources...")
|
||||
}
|
||||
|
||||
if syncSource != "" {
|
||||
fmt.Printf(" Source: %s\n", syncSource)
|
||||
if len(cfg.Sources) == 0 {
|
||||
fmt.Println("No sources configured. Add sources in devour.yaml first.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: Implement actual sync logic
|
||||
// 1. Load sources from config
|
||||
// 2. For each source:
|
||||
// a. Check for changes (hash/timestamp)
|
||||
// b. If changes detected or --force:
|
||||
// - Scrape updated content
|
||||
// - Re-generate embeddings
|
||||
// - Update index
|
||||
// 3. Update metadata
|
||||
state, err := projectstate.LoadSourceState(cfg.Storage.MetadataDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("⚠️ Sync functionality not yet implemented")
|
||||
fmt.Println(" Configure sources in devour.yaml first")
|
||||
updated := 0
|
||||
skipped := 0
|
||||
failed := 0
|
||||
totalDocs := 0
|
||||
|
||||
for _, srcCfg := range cfg.Sources {
|
||||
if syncSource != "" && srcCfg.Name != syncSource {
|
||||
continue
|
||||
}
|
||||
|
||||
source := sourceFromConfig(srcCfg)
|
||||
if source.Type == "" {
|
||||
if source.URL != "" {
|
||||
source.Type = detectSourceType(source.URL)
|
||||
} else if source.Path != "" {
|
||||
source.Type = scraper.SourceTypeLocal
|
||||
}
|
||||
}
|
||||
if source.Name == "" {
|
||||
source.Name = extractName(source.URL)
|
||||
}
|
||||
applySourceProfile(source)
|
||||
|
||||
fmt.Printf("\n• %s (%s)\n", source.Name, source.Type)
|
||||
s := scraper.NewScraper(source.Type, toScraperConfig(cfg, 0))
|
||||
if s == nil {
|
||||
failed++
|
||||
fmt.Printf(" ✗ unsupported source type: %s\n", source.Type)
|
||||
continue
|
||||
}
|
||||
|
||||
key := source.Name
|
||||
if key == "" {
|
||||
key = chooseSourceLabel(source)
|
||||
}
|
||||
lastHash := ""
|
||||
if prev := state.Sources[key]; prev != nil {
|
||||
lastHash = prev.Hash
|
||||
}
|
||||
|
||||
needsUpdate := syncForce || syncRebuild
|
||||
newHash := lastHash
|
||||
if !needsUpdate {
|
||||
changed, hash, detectErr := s.DetectChanges(context.Background(), source, lastHash)
|
||||
if detectErr != nil {
|
||||
fmt.Printf(" ⚠ change detection failed (%v), scraping anyway\n", detectErr)
|
||||
needsUpdate = true
|
||||
} else {
|
||||
needsUpdate = changed
|
||||
newHash = hash
|
||||
}
|
||||
}
|
||||
|
||||
if !needsUpdate {
|
||||
skipped++
|
||||
fmt.Println(" ✓ no changes")
|
||||
continue
|
||||
}
|
||||
|
||||
docs, scrapeErr := s.Scrape(context.Background(), source)
|
||||
if scrapeErr != nil {
|
||||
failed++
|
||||
fmt.Printf(" ✗ scrape failed: %v\n", scrapeErr)
|
||||
state.Sources[key] = &projectstate.SourceState{
|
||||
Name: source.Name,
|
||||
Type: string(source.Type),
|
||||
URL: source.URL,
|
||||
Hash: lastHash,
|
||||
LastSync: time.Now(),
|
||||
DocCount: 0,
|
||||
LastError: scrapeErr.Error(),
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
saved, saveErr := storage.SaveDocuments(docs, storage.SaveOptions{
|
||||
Format: "json",
|
||||
OutputDir: cfg.Storage.DocsDir,
|
||||
AllowEmpty: false,
|
||||
PrintWriter: nil,
|
||||
})
|
||||
if saveErr != nil {
|
||||
failed++
|
||||
fmt.Printf(" ✗ save failed: %v\n", saveErr)
|
||||
continue
|
||||
}
|
||||
|
||||
if newHash == "" {
|
||||
h := sha256.New()
|
||||
for _, d := range docs {
|
||||
if d == nil {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(h, "%s|%s|%s\n", d.ID, d.Hash, d.URL)
|
||||
}
|
||||
newHash = hex.EncodeToString(h.Sum(nil))
|
||||
}
|
||||
|
||||
state.Sources[key] = &projectstate.SourceState{
|
||||
Name: source.Name,
|
||||
Type: string(source.Type),
|
||||
URL: source.URL,
|
||||
Hash: newHash,
|
||||
LastSync: time.Now(),
|
||||
DocCount: saved.Count,
|
||||
LastError: "",
|
||||
}
|
||||
|
||||
updated++
|
||||
totalDocs += saved.Count
|
||||
fmt.Printf(" ✓ updated (%d docs)\n", saved.Count)
|
||||
}
|
||||
|
||||
if err := projectstate.SaveSourceState(cfg.Storage.MetadataDir, state); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if syncRebuild || updated > 0 {
|
||||
engine := search.NewEngine(cfg)
|
||||
if _, err := engine.Rebuild(context.Background()); err != nil {
|
||||
return fmt.Errorf("rebuild index: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\nSync summary: updated=%d skipped=%d failed=%d docs=%d\n", updated, skipped, failed, totalDocs)
|
||||
if failed > 0 {
|
||||
return fmt.Errorf("sync completed with failures")
|
||||
}
|
||||
if syncSource != "" && updated == 0 && skipped == 0 && failed == 0 {
|
||||
return fmt.Errorf("source %q not found in config", syncSource)
|
||||
}
|
||||
if strings.TrimSpace(syncSource) != "" {
|
||||
fmt.Printf("Synced source: %s\n", syncSource)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user