mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
i dont like commits
This commit is contained in:
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -70,28 +69,7 @@ func (s *AstroDocsScraper) DetectChanges(ctx context.Context, source *Source, la
|
||||
}
|
||||
|
||||
func (s *AstroDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *AstroDocsScraper) generateHash(content string) string {
|
||||
|
||||
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -75,28 +74,7 @@ func (s *CloudflareDocsScraper) DetectChanges(ctx context.Context, source *Sourc
|
||||
}
|
||||
|
||||
func (s *CloudflareDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *CloudflareDocsScraper) generateHash(content string) string {
|
||||
|
||||
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -70,28 +69,7 @@ func (s *DockerDocsScraper) DetectChanges(ctx context.Context, source *Source, l
|
||||
}
|
||||
|
||||
func (s *DockerDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *DockerDocsScraper) generateHash(content string) string {
|
||||
|
||||
Vendored
+1
-23
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -91,28 +90,7 @@ func (s *GoDocsScraper) DetectChanges(ctx context.Context, source *Source, lastH
|
||||
}
|
||||
|
||||
func (s *GoDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *GoDocsScraper) generateHash(content string) string {
|
||||
|
||||
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -85,28 +84,7 @@ func (s *JavaDocsScraper) DetectChanges(ctx context.Context, source *Source, las
|
||||
}
|
||||
|
||||
func (s *JavaDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *JavaDocsScraper) generateHash(content string) string {
|
||||
|
||||
Vendored
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -80,28 +79,7 @@ func (s *MCPDocsScraper) DetectChanges(ctx context.Context, source *Source, last
|
||||
}
|
||||
|
||||
func (s *MCPDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *MCPDocsScraper) generateHash(content string) string {
|
||||
|
||||
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -90,28 +89,7 @@ func (s *NuxtDocsScraper) DetectChanges(ctx context.Context, source *Source, las
|
||||
}
|
||||
|
||||
func (s *NuxtDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *NuxtDocsScraper) generateHash(content string) string {
|
||||
|
||||
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -100,28 +99,7 @@ func (s *PythonDocsScraper) DetectChanges(ctx context.Context, source *Source, l
|
||||
}
|
||||
|
||||
func (s *PythonDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *PythonDocsScraper) generateHash(content string) string {
|
||||
|
||||
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -80,28 +79,7 @@ func (s *ReactDocsScraper) DetectChanges(ctx context.Context, source *Source, la
|
||||
}
|
||||
|
||||
func (s *ReactDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *ReactDocsScraper) generateHash(content string) string {
|
||||
|
||||
+1
-23
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -106,28 +105,7 @@ func (s *RustDocsScraper) DetectChanges(ctx context.Context, source *Source, las
|
||||
}
|
||||
|
||||
func (s *RustDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *RustDocsScraper) generateHash(content string) string {
|
||||
|
||||
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -80,28 +79,7 @@ func (s *SpringDocsScraper) DetectChanges(ctx context.Context, source *Source, l
|
||||
}
|
||||
|
||||
func (s *SpringDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *SpringDocsScraper) generateHash(content string) string {
|
||||
|
||||
Vendored
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -85,28 +84,7 @@ func (s *TSDocsScraper) DetectChanges(ctx context.Context, source *Source, lastH
|
||||
}
|
||||
|
||||
func (s *TSDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *TSDocsScraper) generateHash(content string) string {
|
||||
|
||||
Vendored
+65
@@ -1,8 +1,14 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
basescraper "github.com/yourorg/devour/internal/scraper"
|
||||
)
|
||||
@@ -19,3 +25,62 @@ func generateDocID(urlStr string) string {
|
||||
hash := sha256.Sum256([]byte(urlStr))
|
||||
return hex.EncodeToString(hash[:12])
|
||||
}
|
||||
|
||||
func fetchExternalPage(ctx context.Context, client *http.Client, userAgent, targetURL string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
snippet, readErr := readErrorSnippet(resp.Body)
|
||||
if readErr != nil {
|
||||
return "", fmt.Errorf("GET %s returned HTTP %d and body read failed: %w", summarizeURL(targetURL), resp.StatusCode, readErr)
|
||||
}
|
||||
return "", fmt.Errorf("GET %s returned HTTP %d: %s", summarizeURL(targetURL), resp.StatusCode, snippet)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(body), nil
|
||||
}
|
||||
|
||||
func readErrorSnippet(body io.Reader) (string, error) {
|
||||
const maxErrorBodyBytes = 512
|
||||
data, err := io.ReadAll(io.LimitReader(body, maxErrorBodyBytes))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
msg := strings.TrimSpace(string(data))
|
||||
if msg == "" {
|
||||
return "<empty body>", nil
|
||||
}
|
||||
|
||||
return msg, nil
|
||||
}
|
||||
|
||||
func summarizeURL(rawURL string) string {
|
||||
parsedURL, err := url.Parse(rawURL)
|
||||
if err != nil || parsedURL.Host == "" {
|
||||
return rawURL
|
||||
}
|
||||
|
||||
path := parsedURL.EscapedPath()
|
||||
if path == "" {
|
||||
path = "/"
|
||||
}
|
||||
|
||||
return parsedURL.Host + path
|
||||
}
|
||||
|
||||
Vendored
+1
-23
@@ -5,7 +5,6 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -85,28 +84,7 @@ func (s *VueDocsScraper) DetectChanges(ctx context.Context, source *Source, last
|
||||
}
|
||||
|
||||
func (s *VueDocsScraper) fetchPage(ctx context.Context, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
return fetchExternalPage(ctx, s.client, s.config.UserAgent, url)
|
||||
}
|
||||
|
||||
func (s *VueDocsScraper) generateHash(content string) string {
|
||||
|
||||
Reference in New Issue
Block a user