// Package vector provides vector storage capabilities. package vector import ( "context" "crypto/sha256" "encoding/hex" "fmt" "os" "path/filepath" "sync" ) // Config holds vector store configuration. type Config struct { Type string `yaml:"type"` PersistDir string `yaml:"persist_dir"` SimilarityMetric string `yaml:"similarity_metric"` } // Document represents an indexed document. type Document struct { ID string Content string Vector []float32 Metadata map[string]any } // SearchResult represents a search result. type SearchResult struct { ID string Content string Score float64 Metadata map[string]any } type scoredDoc struct { id string score float64 } // Store defines the interface for vector storage. type Store interface { // Add adds documents to the store. Add(ctx context.Context, docs []*Document) error // Search finds similar documents. Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error) // Delete removes a document by ID. Delete(ctx context.Context, id string) error // Get retrieves a document by ID. Get(ctx context.Context, id string) (*Document, error) // Count returns the number of documents. Count(ctx context.Context) (int, error) // Clear removes all documents. Clear(ctx context.Context) error } // NewStore creates a new vector store. func NewStore(config *Config) Store { switch config.Type { case "memory": return NewMemoryStore(config) case "chromem": return NewChromemStore(config) default: return NewMemoryStore(config) } } // MemoryStore implements Store with in-memory storage. type MemoryStore struct { config *Config mu sync.RWMutex docs map[string]*Document vectors [][]float32 } // NewMemoryStore creates an in-memory vector store. func NewMemoryStore(config *Config) *MemoryStore { return &MemoryStore{ config: config, docs: make(map[string]*Document), } } // Add adds documents to the store. func (s *MemoryStore) Add(ctx context.Context, docs []*Document) error { s.mu.Lock() defer s.mu.Unlock() for _, doc := range docs { if doc.ID == "" { doc.ID = generateID(doc.Content) } s.docs[doc.ID] = doc s.vectors = append(s.vectors, doc.Vector) } // Persist if directory is set if s.config.PersistDir != "" { return s.persist() } return nil } // Search finds similar documents using cosine similarity. func (s *MemoryStore) Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error) { s.mu.RLock() defer s.mu.RUnlock() var scored []*scoredDoc for id, doc := range s.docs { if doc.Vector != nil { score := cosineSimilarity(vector, doc.Vector) scored = append(scored, &scoredDoc{id: id, score: score}) } } sortByScore(scored) results := make([]*SearchResult, 0, limit) for i := 0; i < len(scored) && i < limit; i++ { doc := s.docs[scored[i].id] results = append(results, &SearchResult{ ID: doc.ID, Content: doc.Content, Score: scored[i].score, Metadata: doc.Metadata, }) } return results, nil } // Delete removes a document by ID. func (s *MemoryStore) Delete(ctx context.Context, id string) error { s.mu.Lock() defer s.mu.Unlock() delete(s.docs, id) // Rebuild vectors slice s.vectors = nil for _, doc := range s.docs { s.vectors = append(s.vectors, doc.Vector) } return nil } // Get retrieves a document by ID. func (s *MemoryStore) Get(ctx context.Context, id string) (*Document, error) { s.mu.RLock() defer s.mu.RUnlock() doc, ok := s.docs[id] if !ok { return nil, fmt.Errorf("document not found: %s", id) } return doc, nil } // Count returns the number of documents. func (s *MemoryStore) Count(ctx context.Context) (int, error) { s.mu.RLock() defer s.mu.RUnlock() return len(s.docs), nil } // Clear removes all documents. func (s *MemoryStore) Clear(ctx context.Context) error { s.mu.Lock() defer s.mu.Unlock() s.docs = make(map[string]*Document) s.vectors = nil return nil } // persist saves documents to disk. func (s *MemoryStore) persist() error { if s.config.PersistDir == "" { return nil } // Create directory if needed if err := os.MkdirAll(s.config.PersistDir, 0755); err != nil { return err } // Save documents dataFile := filepath.Join(s.config.PersistDir, "documents.json") // TODO: Implement actual JSON serialization _ = dataFile // Placeholder return nil } // ChromemStore wraps chromem-go for production use. type ChromemStore struct { config *Config // TODO: Add chromem-go client } // NewChromemStore creates a chromem-backed store. func NewChromemStore(config *Config) *ChromemStore { return &ChromemStore{config: config} } func (s *ChromemStore) Add(ctx context.Context, docs []*Document) error { // TODO: Implement with chromem-go return fmt.Errorf("chromem store not implemented") } func (s *ChromemStore) Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error) { return nil, fmt.Errorf("chromem store not implemented") } func (s *ChromemStore) Delete(ctx context.Context, id string) error { return fmt.Errorf("chromem store not implemented") } func (s *ChromemStore) Get(ctx context.Context, id string) (*Document, error) { return nil, fmt.Errorf("chromem store not implemented") } func (s *ChromemStore) Count(ctx context.Context) (int, error) { return 0, fmt.Errorf("chromem store not implemented") } func (s *ChromemStore) Clear(ctx context.Context) error { return fmt.Errorf("chromem store not implemented") } // Helper functions func generateID(content string) string { hash := sha256.Sum256([]byte(content)) return hex.EncodeToString(hash[:12]) } func cosineSimilarity(a, b []float32) float64 { if len(a) != len(b) { return 0 } var dotProduct, normA, normB float64 for i := range a { dotProduct += float64(a[i]) * float64(b[i]) normA += float64(a[i]) * float64(a[i]) normB += float64(b[i]) * float64(b[i]) } if normA == 0 || normB == 0 { return 0 } return dotProduct / (sqrt(normA) * sqrt(normB)) } func sqrt(x float64) float64 { // Simple Newton's method for square root if x == 0 { return 0 } z := x for i := 0; i < 10; i++ { z = z - (z*z-x)/(2*z) } return z } func sortByScore(docs []*scoredDoc) { for i := 1; i < len(docs); i++ { for j := i; j > 0 && docs[j].score > docs[j-1].score; j-- { docs[j], docs[j-1] = docs[j-1], docs[j] } } }