# Devour Example Configuration # Copy this file to devour.yaml and customize for your needs version: 1 # Storage paths storage: docs_dir: ./devour_data/docs index_dir: ./devour_data/index metadata_dir: ./devour_data/metadata # Embedding settings embeddings: provider: openai # openai, mock model: text-embedding-3-small dimensions: 1536 api_key: ${OPENAI_API_KEY} # Use environment variable batch_size: 100 # Vector database vector_db: type: memory # memory, chromem persist: true similarity_metric: cosine # Scraping settings scraper: user_agent: "Devour/1.0 (+https://github.com/yourorg/devour)" timeout: 30s retry_count: 3 retry_delay: 5s concurrency: 10 rate_limit: 500ms max_depth: 3 cache_dir: ./devour_data/cache # Scheduler scheduler: enabled: true interval: 72h # Every 3 days check_method: hash # hash, timestamp on_startup: false # Server settings server: mode: local # local, remote port: 8080 host: localhost # Example sources sources: # Web documentation - name: example-docs type: url url: https://docs.example.com include: - ".*\\.md" - ".*\\.html" exclude: - ".*/api/.*" - ".*/legacy/.*" schedule: 24h # OpenAPI specification - name: api-spec type: openapi url: https://api.example.com/openapi.json schedule: 168h # Weekly # GitHub repository - name: github-repo type: github repo: org/repository branch: main include: - "docs/.*" - "README.md" exclude: - "docs/internal/.*" # auth_token: ${GITHUB_TOKEN} # Optional for private repos # Local files - name: local-docs type: local path: ./docs include: - ".*\\.md" - ".*\\.txt"