i dont like commits

This commit is contained in:
Tomas Dvorak
2026-02-24 12:10:13 +01:00
parent 898a3c303f
commit 1d72a1cc01
109 changed files with 43586 additions and 8484 deletions
+42 -19
View File
@@ -5,8 +5,10 @@ import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"sort"
@@ -111,18 +113,19 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
return nil, fmt.Errorf("docs directory is required")
}
if err := os.MkdirAll(e.IndexDir, 0o755); err != nil {
return nil, err
return nil, fmt.Errorf("create index dir %q: %w", e.IndexDir, err)
}
if err := os.MkdirAll(e.MetadataDir, 0o755); err != nil {
return nil, err
return nil, fmt.Errorf("create metadata dir %q: %w", e.MetadataDir, err)
}
docFiles, sourceHash, err := e.listDocFiles()
if err != nil {
return nil, err
return nil, fmt.Errorf("list docs for rebuild: %w", err)
}
docs := make([]indexedDoc, 0, len(docFiles))
parseErrors := make([]error, 0)
tokenCount := 0
for _, file := range docFiles {
select {
@@ -133,6 +136,9 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
rd, err := parseDocFile(file)
if err != nil {
if len(parseErrors) < 20 {
parseErrors = append(parseErrors, fmt.Errorf("%s: %w", file, err))
}
continue
}
if strings.TrimSpace(rd.Content) == "" {
@@ -163,11 +169,17 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
Length: length,
})
}
if len(parseErrors) > 0 {
log.Printf("search rebuild skipped %d files due to parse/read errors (sample: %v)", len(parseErrors), parseErrors[0])
if len(docFiles) > 0 && len(docs) == 0 {
return nil, fmt.Errorf("rebuild produced no indexable docs after parse failures: %w", errors.Join(parseErrors...))
}
}
index := persistedIndex{Version: indexVersion, BuiltAt: time.Now(), Docs: docs}
indexPath := filepath.Join(e.IndexDir, indexFileName)
if err := writeJSON(indexPath, index); err != nil {
return nil, err
return nil, fmt.Errorf("write lexical index: %w", err)
}
meta := persistedMeta{
@@ -179,7 +191,7 @@ func (e *Engine) Rebuild(ctx context.Context) (*IndexStats, error) {
}
metaPath := filepath.Join(e.MetadataDir, metaFileName)
if err := writeJSON(metaPath, meta); err != nil {
return nil, err
return nil, fmt.Errorf("write lexical metadata: %w", err)
}
return &IndexStats{
@@ -199,20 +211,28 @@ func (e *Engine) EnsureIndexed(ctx context.Context) (*IndexStats, error) {
if os.IsNotExist(err) {
return e.Rebuild(ctx)
}
return nil, err
return nil, fmt.Errorf("read index metadata %q: %w", metaPath, err)
}
var meta persistedMeta
if err := json.Unmarshal(b, &meta); err != nil {
return e.Rebuild(ctx)
stats, rebuildErr := e.Rebuild(ctx)
if rebuildErr != nil {
return nil, fmt.Errorf("rebuild after invalid metadata %q: %w", metaPath, rebuildErr)
}
return stats, nil
}
_, sourceHash, err := e.listDocFiles()
if err != nil {
return nil, err
return nil, fmt.Errorf("list docs for metadata check: %w", err)
}
if sourceHash != meta.SourceFileHash {
return e.Rebuild(ctx)
stats, rebuildErr := e.Rebuild(ctx)
if rebuildErr != nil {
return nil, fmt.Errorf("rebuild after source hash change: %w", rebuildErr)
}
return stats, nil
}
return &IndexStats{
@@ -232,18 +252,18 @@ func (e *Engine) Search(ctx context.Context, query string, opts SearchOptions) (
stats, err := e.EnsureIndexed(ctx)
if err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("ensure lexical index: %w", err)
}
indexPath := filepath.Join(e.IndexDir, indexFileName)
b, err := os.ReadFile(indexPath)
if err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("read lexical index %q: %w", indexPath, err)
}
var idx persistedIndex
if err := json.Unmarshal(b, &idx); err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("decode lexical index %q: %w", indexPath, err)
}
limit := opts.Limit
@@ -270,7 +290,7 @@ func (e *Engine) Search(ctx context.Context, query string, opts SearchOptions) (
for _, doc := range idx.Docs {
select {
case <-ctx.Done():
return nil, nil, ctx.Err()
return nil, nil, fmt.Errorf("search canceled: %w", ctx.Err())
default:
}
score := lexicalScore(qFreq, queryTokens, doc)
@@ -322,7 +342,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
err := filepath.WalkDir(e.DocsDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
return fmt.Errorf("walk docs entry %q: %w", path, err)
}
if d.IsDir() {
return nil
@@ -336,7 +356,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
info, statErr := d.Info()
if statErr != nil {
return statErr
return fmt.Errorf("stat docs file %q: %w", path, statErr)
}
files = append(files, path)
fmt.Fprintf(h, "%s|%d|%d\n", path, info.Size(), info.ModTime().UnixNano())
@@ -346,7 +366,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
if os.IsNotExist(err) {
return []string{}, hashString("empty"), nil
}
return nil, "", err
return nil, "", fmt.Errorf("walk docs dir %q: %w", e.DocsDir, err)
}
sort.Strings(files)
@@ -356,7 +376,7 @@ func (e *Engine) listDocFiles() ([]string, string, error) {
func parseDocFile(path string) (*rawDoc, error) {
b, err := os.ReadFile(path)
if err != nil {
return nil, err
return nil, fmt.Errorf("read doc file %q: %w", path, err)
}
ext := strings.ToLower(filepath.Ext(path))
switch ext {
@@ -393,9 +413,12 @@ func markdownTitle(content string) string {
func writeJSON(path string, v any) error {
b, err := json.MarshalIndent(v, "", " ")
if err != nil {
return err
return fmt.Errorf("marshal json payload: %w", err)
}
return os.WriteFile(path, b, 0o644)
if err := os.WriteFile(path, b, 0o644); err != nil {
return fmt.Errorf("write json file %q: %w", path, err)
}
return nil
}
func tokenize(input string) []string {