mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
i dont like commits
This commit is contained in:
+42
-19
@@ -5,8 +5,10 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
@@ -111,18 +113,19 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
|
||||
return nil, fmt.Errorf("docs directory is required")
|
||||
}
|
||||
if err := os.MkdirAll(e.IndexDir, 0o755); err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("create index dir %q: %w", e.IndexDir, err)
|
||||
}
|
||||
if err := os.MkdirAll(e.MetadataDir, 0o755); err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("create metadata dir %q: %w", e.MetadataDir, err)
|
||||
}
|
||||
|
||||
docFiles, sourceHash, err := e.listDocFiles()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("list docs for rebuild: %w", err)
|
||||
}
|
||||
|
||||
docs := make([]indexedDoc, 0, len(docFiles))
|
||||
parseErrors := make([]error, 0)
|
||||
tokenCount := 0
|
||||
for _, file := range docFiles {
|
||||
select {
|
||||
@@ -133,6 +136,9 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
|
||||
|
||||
rd, err := parseDocFile(file)
|
||||
if err != nil {
|
||||
if len(parseErrors) < 20 {
|
||||
parseErrors = append(parseErrors, fmt.Errorf("%s: %w", file, err))
|
||||
}
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(rd.Content) == "" {
|
||||
@@ -163,11 +169,17 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
|
||||
Length: length,
|
||||
})
|
||||
}
|
||||
if len(parseErrors) > 0 {
|
||||
log.Printf("search rebuild skipped %d files due to parse/read errors (sample: %v)", len(parseErrors), parseErrors[0])
|
||||
if len(docFiles) > 0 && len(docs) == 0 {
|
||||
return nil, fmt.Errorf("rebuild produced no indexable docs after parse failures: %w", errors.Join(parseErrors...))
|
||||
}
|
||||
}
|
||||
|
||||
index := persistedIndex{Version: indexVersion, BuiltAt: time.Now(), Docs: docs}
|
||||
indexPath := filepath.Join(e.IndexDir, indexFileName)
|
||||
if err := writeJSON(indexPath, index); err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("write lexical index: %w", err)
|
||||
}
|
||||
|
||||
meta := persistedMeta{
|
||||
@@ -179,7 +191,7 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
|
||||
}
|
||||
metaPath := filepath.Join(e.MetadataDir, metaFileName)
|
||||
if err := writeJSON(metaPath, meta); err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("write lexical metadata: %w", err)
|
||||
}
|
||||
|
||||
return &IndexStats{
|
||||
@@ -199,20 +211,28 @@ func (e *Engine) EnsureIndexed(ctx context.Context) (*IndexStats, error) {
|
||||
if os.IsNotExist(err) {
|
||||
return e.Rebuild(ctx)
|
||||
}
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("read index metadata %q: %w", metaPath, err)
|
||||
}
|
||||
|
||||
var meta persistedMeta
|
||||
if err := json.Unmarshal(b, &meta); err != nil {
|
||||
return e.Rebuild(ctx)
|
||||
stats, rebuildErr := e.Rebuild(ctx)
|
||||
if rebuildErr != nil {
|
||||
return nil, fmt.Errorf("rebuild after invalid metadata %q: %w", metaPath, rebuildErr)
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
_, sourceHash, err := e.listDocFiles()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("list docs for metadata check: %w", err)
|
||||
}
|
||||
if sourceHash != meta.SourceFileHash {
|
||||
return e.Rebuild(ctx)
|
||||
stats, rebuildErr := e.Rebuild(ctx)
|
||||
if rebuildErr != nil {
|
||||
return nil, fmt.Errorf("rebuild after source hash change: %w", rebuildErr)
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
return &IndexStats{
|
||||
@@ -232,18 +252,18 @@ func (e *Engine) Search(ctx context.Context, query string, opts SearchOptions) (
|
||||
|
||||
stats, err := e.EnsureIndexed(ctx)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, fmt.Errorf("ensure lexical index: %w", err)
|
||||
}
|
||||
|
||||
indexPath := filepath.Join(e.IndexDir, indexFileName)
|
||||
b, err := os.ReadFile(indexPath)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, fmt.Errorf("read lexical index %q: %w", indexPath, err)
|
||||
}
|
||||
|
||||
var idx persistedIndex
|
||||
if err := json.Unmarshal(b, &idx); err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, fmt.Errorf("decode lexical index %q: %w", indexPath, err)
|
||||
}
|
||||
|
||||
limit := opts.Limit
|
||||
@@ -270,7 +290,7 @@ func (e *Engine) Search(ctx context.Context, query string, opts SearchOptions) (
|
||||
for _, doc := range idx.Docs {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, nil, ctx.Err()
|
||||
return nil, nil, fmt.Errorf("search canceled: %w", ctx.Err())
|
||||
default:
|
||||
}
|
||||
score := lexicalScore(qFreq, queryTokens, doc)
|
||||
@@ -322,7 +342,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
|
||||
|
||||
err := filepath.WalkDir(e.DocsDir, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("walk docs entry %q: %w", path, err)
|
||||
}
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
@@ -336,7 +356,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
|
||||
|
||||
info, statErr := d.Info()
|
||||
if statErr != nil {
|
||||
return statErr
|
||||
return fmt.Errorf("stat docs file %q: %w", path, statErr)
|
||||
}
|
||||
files = append(files, path)
|
||||
fmt.Fprintf(h, "%s|%d|%d\n", path, info.Size(), info.ModTime().UnixNano())
|
||||
@@ -346,7 +366,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
|
||||
if os.IsNotExist(err) {
|
||||
return []string{}, hashString("empty"), nil
|
||||
}
|
||||
return nil, "", err
|
||||
return nil, "", fmt.Errorf("walk docs dir %q: %w", e.DocsDir, err)
|
||||
}
|
||||
|
||||
sort.Strings(files)
|
||||
@@ -356,7 +376,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
|
||||
func parseDocFile(path string) (*rawDoc, error) {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("read doc file %q: %w", path, err)
|
||||
}
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
switch ext {
|
||||
@@ -393,9 +413,12 @@ func markdownTitle(content string) string {
|
||||
func writeJSON(path string, v any) error {
|
||||
b, err := json.MarshalIndent(v, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("marshal json payload: %w", err)
|
||||
}
|
||||
return os.WriteFile(path, b, 0o644)
|
||||
if err := os.WriteFile(path, b, 0o644); err != nil {
|
||||
return fmt.Errorf("write json file %q: %w", path, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tokenize(input string) []string {
|
||||
|
||||
Reference in New Issue
Block a user