mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 12:33:04 +00:00
i dont like commits
This commit is contained in:
@@ -4,8 +4,10 @@ import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
@@ -44,14 +46,15 @@ func (s *LocalScraper) Scrape(ctx context.Context, source *Source) ([]*Document,
|
||||
|
||||
info, err := os.Stat(root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("stat local source root %q: %w", root, err)
|
||||
}
|
||||
|
||||
docs := make([]*Document, 0)
|
||||
nonFatalErrors := make([]error, 0)
|
||||
if !info.IsDir() {
|
||||
doc, err := s.fileToDocument(root, source)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("convert local source file %q: %w", root, err)
|
||||
}
|
||||
return []*Document{doc}, nil
|
||||
}
|
||||
@@ -89,13 +92,22 @@ func (s *LocalScraper) Scrape(ctx context.Context, source *Source) ([]*Document,
|
||||
|
||||
doc, err := s.fileToDocument(path, source)
|
||||
if err != nil {
|
||||
if len(nonFatalErrors) < 20 {
|
||||
nonFatalErrors = append(nonFatalErrors, fmt.Errorf("%s: %w", path, err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
docs = append(docs, doc)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("walk local source root %q: %w", root, err)
|
||||
}
|
||||
if len(nonFatalErrors) > 0 {
|
||||
log.Printf("local scraper skipped %d files due to conversion errors (sample: %v)", len(nonFatalErrors), nonFatalErrors[0])
|
||||
if len(docs) == 0 {
|
||||
return nil, fmt.Errorf("local scrape failed while converting files: %w", errors.Join(nonFatalErrors...))
|
||||
}
|
||||
}
|
||||
|
||||
return docs, nil
|
||||
@@ -118,7 +130,7 @@ func (s *LocalScraper) DetectChanges(ctx context.Context, source *Source, lastHa
|
||||
h := sha256.New()
|
||||
err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("walk local source path %q: %w", path, err)
|
||||
}
|
||||
if d.IsDir() {
|
||||
name := d.Name()
|
||||
@@ -133,13 +145,13 @@ func (s *LocalScraper) DetectChanges(ctx context.Context, source *Source, lastHa
|
||||
|
||||
info, infoErr := d.Info()
|
||||
if infoErr != nil {
|
||||
return infoErr
|
||||
return fmt.Errorf("stat local source file %q: %w", path, infoErr)
|
||||
}
|
||||
fmt.Fprintf(h, "%s|%d|%d\n", path, info.Size(), info.ModTime().UnixNano())
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return false, "", err
|
||||
return false, "", fmt.Errorf("walk local source root %q for change detection: %w", root, err)
|
||||
}
|
||||
|
||||
hash := hex.EncodeToString(h.Sum(nil))
|
||||
@@ -149,7 +161,7 @@ func (s *LocalScraper) DetectChanges(ctx context.Context, source *Source, lastHa
|
||||
func (s *LocalScraper) fileToDocument(path string, source *Source) (*Document, error) {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("read local source file %q: %w", path, err)
|
||||
}
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
|
||||
Reference in New Issue
Block a user