mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 12:33:04 +00:00
291 lines
6.3 KiB
Go
291 lines
6.3 KiB
Go
// Package vector provides vector storage capabilities.
|
|
package vector
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
)
|
|
|
|
// Config holds vector store configuration.
|
|
type Config struct {
|
|
Type string `yaml:"type"`
|
|
PersistDir string `yaml:"persist_dir"`
|
|
SimilarityMetric string `yaml:"similarity_metric"`
|
|
}
|
|
|
|
// Document represents an indexed document.
|
|
type Document struct {
|
|
ID string
|
|
Content string
|
|
Vector []float32
|
|
Metadata map[string]any
|
|
}
|
|
|
|
// SearchResult represents a search result.
|
|
type SearchResult struct {
|
|
ID string
|
|
Content string
|
|
Score float64
|
|
Metadata map[string]any
|
|
}
|
|
|
|
type scoredDoc struct {
|
|
id string
|
|
score float64
|
|
}
|
|
|
|
// Store defines the interface for vector storage.
|
|
type Store interface {
|
|
// Add adds documents to the store.
|
|
Add(ctx context.Context, docs []*Document) error
|
|
|
|
// Search finds similar documents.
|
|
Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error)
|
|
|
|
// Delete removes a document by ID.
|
|
Delete(ctx context.Context, id string) error
|
|
|
|
// Get retrieves a document by ID.
|
|
Get(ctx context.Context, id string) (*Document, error)
|
|
|
|
// Count returns the number of documents.
|
|
Count(ctx context.Context) (int, error)
|
|
|
|
// Clear removes all documents.
|
|
Clear(ctx context.Context) error
|
|
}
|
|
|
|
// NewStore creates a new vector store.
|
|
func NewStore(config *Config) Store {
|
|
switch config.Type {
|
|
case "memory":
|
|
return NewMemoryStore(config)
|
|
case "chromem":
|
|
return NewChromemStore(config)
|
|
default:
|
|
return NewMemoryStore(config)
|
|
}
|
|
}
|
|
|
|
// MemoryStore implements Store with in-memory storage.
|
|
type MemoryStore struct {
|
|
config *Config
|
|
mu sync.RWMutex
|
|
docs map[string]*Document
|
|
vectors [][]float32
|
|
}
|
|
|
|
// NewMemoryStore creates an in-memory vector store.
|
|
func NewMemoryStore(config *Config) *MemoryStore {
|
|
return &MemoryStore{
|
|
config: config,
|
|
docs: make(map[string]*Document),
|
|
}
|
|
}
|
|
|
|
// Add adds documents to the store.
|
|
func (s *MemoryStore) Add(ctx context.Context, docs []*Document) error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
for _, doc := range docs {
|
|
if doc.ID == "" {
|
|
doc.ID = generateID(doc.Content)
|
|
}
|
|
s.docs[doc.ID] = doc
|
|
s.vectors = append(s.vectors, doc.Vector)
|
|
}
|
|
|
|
// Persist if directory is set
|
|
if s.config.PersistDir != "" {
|
|
return s.persist()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Search finds similar documents using cosine similarity.
|
|
func (s *MemoryStore) Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error) {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
var scored []*scoredDoc
|
|
|
|
for id, doc := range s.docs {
|
|
if doc.Vector != nil {
|
|
score := cosineSimilarity(vector, doc.Vector)
|
|
scored = append(scored, &scoredDoc{id: id, score: score})
|
|
}
|
|
}
|
|
|
|
sortByScore(scored)
|
|
|
|
results := make([]*SearchResult, 0, limit)
|
|
for i := 0; i < len(scored) && i < limit; i++ {
|
|
doc := s.docs[scored[i].id]
|
|
results = append(results, &SearchResult{
|
|
ID: doc.ID,
|
|
Content: doc.Content,
|
|
Score: scored[i].score,
|
|
Metadata: doc.Metadata,
|
|
})
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// Delete removes a document by ID.
|
|
func (s *MemoryStore) Delete(ctx context.Context, id string) error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
delete(s.docs, id)
|
|
|
|
// Rebuild vectors slice
|
|
s.vectors = nil
|
|
for _, doc := range s.docs {
|
|
s.vectors = append(s.vectors, doc.Vector)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Get retrieves a document by ID.
|
|
func (s *MemoryStore) Get(ctx context.Context, id string) (*Document, error) {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
doc, ok := s.docs[id]
|
|
if !ok {
|
|
return nil, fmt.Errorf("document not found: %s", id)
|
|
}
|
|
|
|
return doc, nil
|
|
}
|
|
|
|
// Count returns the number of documents.
|
|
func (s *MemoryStore) Count(ctx context.Context) (int, error) {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
return len(s.docs), nil
|
|
}
|
|
|
|
// Clear removes all documents.
|
|
func (s *MemoryStore) Clear(ctx context.Context) error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
s.docs = make(map[string]*Document)
|
|
s.vectors = nil
|
|
|
|
return nil
|
|
}
|
|
|
|
// persist saves documents to disk.
|
|
func (s *MemoryStore) persist() error {
|
|
if s.config.PersistDir == "" {
|
|
return nil
|
|
}
|
|
|
|
// Create directory if needed
|
|
if err := os.MkdirAll(s.config.PersistDir, 0755); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Save documents
|
|
dataFile := filepath.Join(s.config.PersistDir, "documents.json")
|
|
// TODO: Implement actual JSON serialization
|
|
|
|
_ = dataFile // Placeholder
|
|
|
|
return nil
|
|
}
|
|
|
|
// ChromemStore wraps chromem-go for production use.
|
|
type ChromemStore struct {
|
|
config *Config
|
|
// TODO: Add chromem-go client
|
|
}
|
|
|
|
// NewChromemStore creates a chromem-backed store.
|
|
func NewChromemStore(config *Config) *ChromemStore {
|
|
return &ChromemStore{config: config}
|
|
}
|
|
|
|
func (s *ChromemStore) Add(ctx context.Context, docs []*Document) error {
|
|
// TODO: Implement with chromem-go
|
|
return fmt.Errorf("chromem store not implemented")
|
|
}
|
|
|
|
func (s *ChromemStore) Search(ctx context.Context, vector []float32, limit int) ([]*SearchResult, error) {
|
|
return nil, fmt.Errorf("chromem store not implemented")
|
|
}
|
|
|
|
func (s *ChromemStore) Delete(ctx context.Context, id string) error {
|
|
return fmt.Errorf("chromem store not implemented")
|
|
}
|
|
|
|
func (s *ChromemStore) Get(ctx context.Context, id string) (*Document, error) {
|
|
return nil, fmt.Errorf("chromem store not implemented")
|
|
}
|
|
|
|
func (s *ChromemStore) Count(ctx context.Context) (int, error) {
|
|
return 0, fmt.Errorf("chromem store not implemented")
|
|
}
|
|
|
|
func (s *ChromemStore) Clear(ctx context.Context) error {
|
|
return fmt.Errorf("chromem store not implemented")
|
|
}
|
|
|
|
// Helper functions
|
|
|
|
func generateID(content string) string {
|
|
hash := sha256.Sum256([]byte(content))
|
|
return hex.EncodeToString(hash[:12])
|
|
}
|
|
|
|
func cosineSimilarity(a, b []float32) float64 {
|
|
if len(a) != len(b) {
|
|
return 0
|
|
}
|
|
|
|
var dotProduct, normA, normB float64
|
|
for i := range a {
|
|
dotProduct += float64(a[i]) * float64(b[i])
|
|
normA += float64(a[i]) * float64(a[i])
|
|
normB += float64(b[i]) * float64(b[i])
|
|
}
|
|
|
|
if normA == 0 || normB == 0 {
|
|
return 0
|
|
}
|
|
|
|
return dotProduct / (sqrt(normA) * sqrt(normB))
|
|
}
|
|
|
|
func sqrt(x float64) float64 {
|
|
// Simple Newton's method for square root
|
|
if x == 0 {
|
|
return 0
|
|
}
|
|
z := x
|
|
for i := 0; i < 10; i++ {
|
|
z = z - (z*z-x)/(2*z)
|
|
}
|
|
return z
|
|
}
|
|
|
|
func sortByScore(docs []*scoredDoc) {
|
|
for i := 1; i < len(docs); i++ {
|
|
for j := i; j > 0 && docs[j].score > docs[j-1].score; j-- {
|
|
docs[j], docs[j-1] = docs[j-1], docs[j]
|
|
}
|
|
}
|
|
}
|