// Package scraper provides document scraping capabilities for various sources.
package scraper

import (
	"context"
	"time"
)

// SourceType represents the type of documentation source.
type SourceType string

const (
	SourceTypeWeb            SourceType = "url"
	SourceTypeGitHub         SourceType = "github"
	SourceTypeOpenAPI        SourceType = "openapi"
	SourceTypeLocal          SourceType = "local"
	SourceTypeGoDocs         SourceType = "godocs"
	SourceTypeRustDocs       SourceType = "rustdocs"
	SourceTypePythonDocs     SourceType = "pythondocs"
	SourceTypeJavaDocs       SourceType = "javadocs"
	SourceTypeSpringDocs     SourceType = "springdocs"
	SourceTypeSpringAIDocs   SourceType = "springaidocs"
	SourceTypeTSDocs         SourceType = "tsdocs"
	SourceTypeReactDocs      SourceType = "reactdocs"
	SourceTypeVueDocs        SourceType = "vuedocs"
	SourceTypeNuxtDocs       SourceType = "nuxtdocs"
	SourceTypeMCPDocs        SourceType = "mcpdocs"
	SourceTypeDockerDocs     SourceType = "dockerdocs"
	SourceTypeCloudflareDocs SourceType = "cloudflaredocs"
	SourceTypeAstroDocs      SourceType = "astrodocs"
)

// Source represents a documentation source to scrape.
type Source struct {
	Name     string     `yaml:"name"`
	Type     SourceType `yaml:"type"`
	URL      string     `yaml:"url,omitempty"`
	Repo     string     `yaml:"repo,omitempty"`
	Branch   string     `yaml:"branch,omitempty"`
	Path     string     `yaml:"path,omitempty"`
	Include  []string   `yaml:"include,omitempty"`
	Exclude  []string   `yaml:"exclude,omitempty"`
	Schedule string     `yaml:"schedule,omitempty"`
}

// Document represents a scraped document.
type Document struct {
	ID        string                 `json:"id"`
	Source    string                 `json:"source"`
	Type      string                 `json:"type"`
	Title     string                 `json:"title"`
	Content   string                 `json:"content"`
	URL       string                 `json:"url,omitempty"`
	Metadata  map[string]interface{} `json:"metadata"`
	Hash      string                 `json:"hash"`
	Timestamp time.Time              `json:"timestamp"`
}

// Config holds scraper configuration.
type Config struct {
	UserAgent   string        `yaml:"user_agent"`
	Timeout     time.Duration `yaml:"timeout"`
	RetryCount  int           `yaml:"retry_count"`
	RetryDelay  time.Duration `yaml:"retry_delay"`
	Concurrency int           `yaml:"concurrency"`
	RateLimit   time.Duration `yaml:"rate_limit"`
	MaxDepth    int           `yaml:"max_depth"`
	CacheDir    string        `yaml:"cache_dir"`
}

// Scraper defines the interface for document scrapers.
type Scraper interface {
	// Scrape fetches and parses documents from the source.
	Scrape(ctx context.Context, source *Source) ([]*Document, error)

	// DetectChanges checks if the source has changed since last scrape.
	DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error)
}

// NewScraper creates a new scraper for the given source type.
func NewScraper(sourceType SourceType, config *Config) Scraper {
	switch sourceType {
	case SourceTypeWeb:
		return NewWebScraper(config)
	case SourceTypeGitHub:
		return NewGitHubScraper(config)
	case SourceTypeOpenAPI:
		return NewOpenAPIScraper(config)
	case SourceTypeLocal:
		return NewLocalScraper(config)
	case SourceTypeGoDocs:
		return NewGoDocsScraper(config)
	case SourceTypeRustDocs:
		return NewRustDocsScraper(config)
	case SourceTypePythonDocs:
		return NewPythonDocsScraper(config)
	case SourceTypeJavaDocs:
		return NewJavaDocsScraper(config)
	case SourceTypeSpringDocs:
		return NewSpringDocsScraper(config)
	case SourceTypeTSDocs:
		return NewTSDocsScraper(config)
	case SourceTypeReactDocs:
		return NewReactDocsScraper(config)
	case SourceTypeVueDocs:
		return NewVueDocsScraper(config)
	case SourceTypeNuxtDocs:
		return NewNuxtDocsScraper(config)
	case SourceTypeMCPDocs:
		return NewMCPDocsScraper(config)
	case SourceTypeDockerDocs:
		return NewDockerDocsScraper(config)
	case SourceTypeCloudflareDocs:
		return NewCloudflareDocsScraper(config)
	case SourceTypeAstroDocs:
		return NewAstroDocsScraper(config)
	default:
		return nil
	}
}

// DetectSourceType determines the source type from a URL or path.
func DetectSourceType(input string) SourceType {
	// TODO: Implement detection logic
	if len(input) > 4 && input[:4] == "http" {
		return SourceTypeWeb
	}
	return SourceTypeLocal
}