This commit is contained in:
Tomas Dvorak
2026-02-22 15:41:27 +01:00
parent 0b88627e54
commit 409acd2e08
84 changed files with 65382 additions and 27475 deletions
+67
View File
@@ -0,0 +1,67 @@
package scraper
import (
"context"
"fmt"
)
// ScraperConstructor defines a function that creates a scraper
type ScraperConstructor func(*Config) Scraper
// ScraperRegistry manages scraper constructors without importing them
type ScraperRegistry struct {
constructors map[SourceType]ScraperConstructor
}
// NewScraperRegistry creates a new registry
func NewScraperRegistry() *ScraperRegistry {
return &ScraperRegistry{
constructors: make(map[SourceType]ScraperConstructor),
}
}
// Register registers a scraper constructor
func (r *ScraperRegistry) Register(sourceType SourceType, constructor ScraperConstructor) {
r.constructors[sourceType] = constructor
}
// Create creates a scraper instance
func (r *ScraperRegistry) Create(sourceType SourceType, config *Config) Scraper {
if constructor, exists := r.constructors[sourceType]; exists {
return constructor(config)
}
return nil
}
// Global registry
var globalRegistry = NewScraperRegistry()
// RegisterScraper registers a scraper globally
func RegisterScraper(sourceType SourceType, constructor ScraperConstructor) {
globalRegistry.Register(sourceType, constructor)
}
// CreateScraper creates a scraper using the global registry
func CreateScraper(sourceType SourceType, config *Config) Scraper {
return globalRegistry.Create(sourceType, config)
}
// FallbackScraper provides basic functionality when specific scrapers aren't available
type FallbackScraper struct {
config *Config
}
// NewFallbackScraper creates a fallback scraper
func NewFallbackScraper(config *Config) *FallbackScraper {
return &FallbackScraper{config: config}
}
// Scrape implements basic scraping functionality
func (f *FallbackScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
return nil, fmt.Errorf("fallback scraper not implemented for source type: %s", source.Type)
}
// DetectChanges implements basic change detection
func (f *FallbackScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
return false, "", fmt.Errorf("fallback scraper not implemented for source type: %s", source.Type)
}