i dont like commits

This commit is contained in:
Tomas Dvorak
2026-02-24 12:10:13 +01:00
parent 898a3c303f
commit 1d72a1cc01
109 changed files with 43586 additions and 8484 deletions
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -70,28 +69,7 @@ func (s *AstroDocsScraper) DetectChanges(ctx context.Context, source *Source, la
}
func (s *AstroDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *AstroDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -75,28 +74,7 @@ func (s *CloudflareDocsScraper) DetectChanges(ctx context.Context, source *Sourc
}
func (s *CloudflareDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *CloudflareDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -70,28 +69,7 @@ func (s *DockerDocsScraper) DetectChanges(ctx context.Context, source *Source, l
}
func (s *DockerDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *DockerDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -6,7 +6,6 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -91,28 +90,7 @@ func (s *GoDocsScraper) DetectChanges(ctx context.Context, source *Source, lastH
}
func (s *GoDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *GoDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -85,28 +84,7 @@ func (s *JavaDocsScraper) DetectChanges(ctx context.Context, source *Source, las
}
func (s *JavaDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *JavaDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -80,28 +79,7 @@ func (s *MCPDocsScraper) DetectChanges(ctx context.Context, source *Source, last
}
func (s *MCPDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *MCPDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -90,28 +89,7 @@ func (s *NuxtDocsScraper) DetectChanges(ctx context.Context, source *Source, las
}
func (s *NuxtDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *NuxtDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -100,28 +99,7 @@ func (s *PythonDocsScraper) DetectChanges(ctx context.Context, source *Source, l
}
func (s *PythonDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *PythonDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -80,28 +79,7 @@ func (s *ReactDocsScraper) DetectChanges(ctx context.Context, source *Source, la
}
func (s *ReactDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *ReactDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -6,7 +6,6 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -106,28 +105,7 @@ func (s *RustDocsScraper) DetectChanges(ctx context.Context, source *Source, las
}
func (s *RustDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *RustDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -80,28 +79,7 @@ func (s *SpringDocsScraper) DetectChanges(ctx context.Context, source *Source, l
}
func (s *SpringDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *SpringDocsScraper) generateHash(content string) string {
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -85,28 +84,7 @@ func (s *TSDocsScraper) DetectChanges(ctx context.Context, source *Source, lastH
}
func (s *TSDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *TSDocsScraper) generateHash(content string) string {
+65
View File
@@ -1,8 +1,14 @@
package scraper
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"net/url"
"strings"
basescraper "github.com/yourorg/devour/internal/scraper"
)
@@ -19,3 +25,62 @@ func generateDocID(urlStr string) string {
hash := sha256.Sum256([]byte(urlStr))
return hex.EncodeToString(hash[:12])
}
func fetchExternalPage(ctx context.Context, client *http.Client, userAgent, targetURL string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
snippet, readErr := readErrorSnippet(resp.Body)
if readErr != nil {
return "", fmt.Errorf("GET %s returned HTTP %d and body read failed: %w", summarizeURL(targetURL), resp.StatusCode, readErr)
}
return "", fmt.Errorf("GET %s returned HTTP %d: %s", summarizeURL(targetURL), resp.StatusCode, snippet)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func readErrorSnippet(body io.Reader) (string, error) {
const maxErrorBodyBytes = 512
data, err := io.ReadAll(io.LimitReader(body, maxErrorBodyBytes))
if err != nil {
return "", err
}
msg := strings.TrimSpace(string(data))
if msg == "" {
return "<empty body>", nil
}
return msg, nil
}
func summarizeURL(rawURL string) string {
parsedURL, err := url.Parse(rawURL)
if err != nil || parsedURL.Host == "" {
return rawURL
}
path := parsedURL.EscapedPath()
if path == "" {
path = "/"
}
return parsedURL.Host + path
}
+1 -23
View File
@@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -85,28 +84,7 @@ func (s *VueDocsScraper) DetectChanges(ctx context.Context, source *Source, last
}
func (s *VueDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", s.config.UserAgent)
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
}
func (s *VueDocsScraper) generateHash(content string) string {