i dont like commits

This commit is contained in:
Tomas Dvorak
2026-02-24 12:10:13 +01:00
parent 898a3c303f
commit 1d72a1cc01
109 changed files with 43586 additions and 8484 deletions
+5 -4
View File
@@ -4,6 +4,7 @@ import (
"context"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"net/url"
"path"
@@ -32,7 +33,7 @@ func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, e
scheduled := make(map[string]bool)
contentHashes := make(map[string]bool)
var mu sync.Mutex
var scrapeErrors []string
var scrapeErrors []error
// Parse base URL for domain restrictions
baseURL, err := url.Parse(source.URL)
@@ -102,7 +103,7 @@ func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, e
}
mu.Lock()
if len(scrapeErrors) < 20 {
scrapeErrors = append(scrapeErrors, fmt.Sprintf("%s: %v", reqURL, err))
scrapeErrors = append(scrapeErrors, fmt.Errorf("%s: %w", reqURL, err))
}
mu.Unlock()
})
@@ -236,7 +237,7 @@ func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, e
}
mu.Lock()
if len(scrapeErrors) < 20 {
scrapeErrors = append(scrapeErrors, fmt.Sprintf("%s: %v", absoluteURL, err))
scrapeErrors = append(scrapeErrors, fmt.Errorf("%s: %w", absoluteURL, err))
}
mu.Unlock()
}
@@ -256,7 +257,7 @@ func (s *WebScraper) Scrape(ctx context.Context, source *Source) ([]*Document, e
if len(documents) == 0 {
if len(scrapeErrors) > 0 {
return nil, fmt.Errorf("web scrape failed: %s", strings.Join(scrapeErrors, "; "))
return nil, fmt.Errorf("web scrape failed: %w", errors.Join(scrapeErrors...))
}
return nil, fmt.Errorf("web scrape extracted no documents from %s", source.URL)
}