mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
68 lines
2.1 KiB
Go
68 lines
2.1 KiB
Go
package scraper
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
)
|
|
|
|
// ScraperConstructor defines a function that creates a scraper
|
|
type ScraperConstructor func(*Config) Scraper
|
|
|
|
// ScraperRegistry manages scraper constructors without importing them
|
|
type ScraperRegistry struct {
|
|
constructors map[SourceType]ScraperConstructor
|
|
}
|
|
|
|
// NewScraperRegistry creates a new registry
|
|
func NewScraperRegistry() *ScraperRegistry {
|
|
return &ScraperRegistry{
|
|
constructors: make(map[SourceType]ScraperConstructor),
|
|
}
|
|
}
|
|
|
|
// Register registers a scraper constructor
|
|
func (r *ScraperRegistry) Register(sourceType SourceType, constructor ScraperConstructor) {
|
|
r.constructors[sourceType] = constructor
|
|
}
|
|
|
|
// Create creates a scraper instance
|
|
func (r *ScraperRegistry) Create(sourceType SourceType, config *Config) Scraper {
|
|
if constructor, exists := r.constructors[sourceType]; exists {
|
|
return wrapScraper(constructor(config))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Global registry
|
|
var globalRegistry = NewScraperRegistry()
|
|
|
|
// RegisterScraper registers a scraper globally
|
|
func RegisterScraper(sourceType SourceType, constructor ScraperConstructor) {
|
|
globalRegistry.Register(sourceType, constructor)
|
|
}
|
|
|
|
// CreateScraper creates a scraper using the global registry
|
|
func CreateScraper(sourceType SourceType, config *Config) Scraper {
|
|
return globalRegistry.Create(sourceType, config)
|
|
}
|
|
|
|
// FallbackScraper provides basic functionality when specific scrapers aren't available
|
|
type FallbackScraper struct {
|
|
config *Config
|
|
}
|
|
|
|
// NewFallbackScraper creates a fallback scraper
|
|
func NewFallbackScraper(config *Config) *FallbackScraper {
|
|
return &FallbackScraper{config: config}
|
|
}
|
|
|
|
// Scrape implements basic scraping functionality
|
|
func (f *FallbackScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
|
|
return nil, fmt.Errorf("fallback scraper not implemented for source type: %s", source.Type)
|
|
}
|
|
|
|
// DetectChanges implements basic change detection
|
|
func (f *FallbackScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
|
|
return false, "", fmt.Errorf("fallback scraper not implemented for source type: %s", source.Type)
|
|
}
|