package scraper import ( "context" "encoding/json" "net/http" "net/http/httptest" "net/url" "strings" "testing" "time" ) func TestLocalSearchScraperScrape(t *testing.T) { mux := http.NewServeMux() baseURL := "" mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) { if got := r.URL.Query().Get("q"); got != "go http client" { t.Fatalf("expected query go http client, got %q", got) } if got := r.URL.Query().Get("format"); got != "json" { t.Fatalf("expected format=json, got %q", got) } _ = json.NewEncoder(w).Encode(map[string]interface{}{ "results": []map[string]interface{}{ { "url": baseURL + "/docs/http-client", "title": "HTTP Client Guide", "content": "How to build an HTTP client in Go", "engine": "searxng", "score": 0.99, }, }, }) }) mux.HandleFunc("/docs/http-client", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`HTTP Client Guide
` + strings.Repeat("http client docs ", 30) + `
`)) }) srv := httptest.NewServer(mux) defer srv.Close() baseURL = srv.URL s := NewLocalSearchScraper(&Config{ UserAgent: "DevourTest/1.0", Timeout: 2 * time.Second, }) docs, err := s.Scrape(context.Background(), &Source{ Name: "local-search", Type: SourceTypeLocalSearch, URL: srv.URL + "/search", Query: "go http client", ResultLimit: 5, }) if err != nil { t.Fatalf("unexpected scrape error: %v", err) } if len(docs) == 0 { t.Fatal("expected at least one document") } doc := docs[0] if doc.URL != srv.URL+"/docs/http-client" { t.Fatalf("unexpected document URL: %q", doc.URL) } if doc.Metadata["search_query"] != "go http client" { t.Fatalf("expected metadata search_query, got %v", doc.Metadata["search_query"]) } if doc.Metadata["search_engine"] != "searxng" { t.Fatalf("expected metadata search_engine=searxng, got %v", doc.Metadata["search_engine"]) } } func TestLocalSearchScraperDomainFilter(t *testing.T) { mux := http.NewServeMux() baseURL := "" mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]interface{}{ "results": []map[string]interface{}{ { "url": baseURL + "/docs/in-scope", "title": "In Scope", }, { "url": "https://example.com/out-of-scope", "title": "Out Scope", }, }, }) }) mux.HandleFunc("/docs/in-scope", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`In Scope
` + strings.Repeat("scoped docs ", 30) + `
`)) }) srv := httptest.NewServer(mux) defer srv.Close() baseURL = srv.URL parsed, err := url.Parse(srv.URL) if err != nil { t.Fatalf("failed to parse server URL: %v", err) } s := NewLocalSearchScraper(&Config{ UserAgent: "DevourTest/1.0", Timeout: 2 * time.Second, }) docs, err := s.Scrape(context.Background(), &Source{ Name: "local-search", Type: SourceTypeLocalSearch, URL: srv.URL + "/search", Query: "scope test", ResultLimit: 10, Domains: []string{parsed.Hostname()}, }) if err != nil { t.Fatalf("unexpected scrape error: %v", err) } if len(docs) == 0 { t.Fatal("expected at least one in-scope document") } for _, doc := range docs { docURL, parseErr := url.Parse(doc.URL) if parseErr != nil { t.Fatalf("invalid doc URL %q: %v", doc.URL, parseErr) } if docURL.Hostname() != parsed.Hostname() { t.Fatalf("expected only in-scope domain, got %q", doc.URL) } } } func TestLocalSearchScraperRequiresQuery(t *testing.T) { s := NewLocalSearchScraper(&Config{ UserAgent: "DevourTest/1.0", Timeout: 2 * time.Second, }) _, err := s.Scrape(context.Background(), &Source{ Name: "local-search", Type: SourceTypeLocalSearch, URL: "http://127.0.0.1:8080/search", }) if err == nil { t.Fatal("expected error when query is missing") } if !strings.Contains(err.Error(), "query") { t.Fatalf("unexpected error: %v", err) } } func TestLocalSearchScraperDetectChanges(t *testing.T) { mux := http.NewServeMux() baseURL := "" resultPath := "/docs/one" mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]interface{}{ "results": []map[string]interface{}{ { "url": baseURL + resultPath, "title": "Versioned", "score": 1.0, }, }, }) }) srv := httptest.NewServer(mux) defer srv.Close() baseURL = srv.URL s := NewLocalSearchScraper(&Config{ UserAgent: "DevourTest/1.0", Timeout: 2 * time.Second, }) source := &Source{ Name: "local-search", Type: SourceTypeLocalSearch, URL: srv.URL + "/search", Query: "version test", ResultLimit: 3, } changed, hash1, err := s.DetectChanges(context.Background(), source, "") if err != nil { t.Fatalf("unexpected detect changes error: %v", err) } if !changed { t.Fatal("expected first detect changes call to report changed") } if hash1 == "" { t.Fatal("expected non-empty hash") } changed, hash2, err := s.DetectChanges(context.Background(), source, hash1) if err != nil { t.Fatalf("unexpected detect changes error: %v", err) } if changed { t.Fatal("expected unchanged results with identical hash") } if hash2 != hash1 { t.Fatalf("expected identical hash, got %q and %q", hash1, hash2) } resultPath = "/docs/two" changed, hash3, err := s.DetectChanges(context.Background(), source, hash1) if err != nil { t.Fatalf("unexpected detect changes error: %v", err) } if !changed { t.Fatal("expected changed results after search output changed") } if hash3 == hash1 { t.Fatal("expected hash to change") } }