mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
119 lines
2.5 KiB
YAML
119 lines
2.5 KiB
YAML
# Devour Example Configuration
|
|
# Copy this file to devour.yaml and customize for your needs
|
|
|
|
version: 1
|
|
|
|
# Storage paths
|
|
storage:
|
|
docs_dir: ./devour_data/docs
|
|
index_dir: ./devour_data/index
|
|
metadata_dir: ./devour_data/metadata
|
|
cache_dir: ./devour_data/cache
|
|
|
|
# Embedding settings (optional for lexical search; required for future embedding flows)
|
|
embeddings:
|
|
provider: openai # openai, mock
|
|
model: text-embedding-3-small
|
|
dimensions: 1536
|
|
api_key: ${OPENAI_API_KEY} # Use environment variable
|
|
batch_size: 100
|
|
|
|
# Vector database (optional in current local-first pipeline)
|
|
vector_db:
|
|
type: memory # memory, chromem
|
|
persist: true
|
|
similarity_metric: cosine
|
|
|
|
# Scraping settings
|
|
scraper:
|
|
user_agent: "Devour/1.0 (+https://github.com/yourorg/devour)"
|
|
timeout: 30s
|
|
retry_count: 3
|
|
retry_delay: 1s
|
|
concurrency: 10
|
|
rate_limit: 500ms
|
|
max_depth: 3
|
|
cache_dir: ./devour_data/cache
|
|
|
|
# Scheduler
|
|
scheduler:
|
|
enabled: true
|
|
interval: 72h # Every 3 days
|
|
check_method: hash # hash, timestamp
|
|
on_startup: false
|
|
|
|
# Server settings
|
|
server:
|
|
mode: local # local, remote
|
|
transport: stdio # stdio, http
|
|
port: 8080
|
|
host: localhost
|
|
|
|
# Local lexical indexing defaults
|
|
indexing:
|
|
enabled: true
|
|
auto_reindex: true
|
|
snippet_length: 220
|
|
max_docs: 10000
|
|
|
|
# Verification defaults
|
|
verification:
|
|
enabled: true
|
|
timeout: 90s
|
|
|
|
# Example sources
|
|
sources:
|
|
# Web documentation
|
|
- name: example-docs
|
|
type: url
|
|
url: https://docs.example.com
|
|
include:
|
|
- ".*\\.md"
|
|
- ".*\\.html"
|
|
exclude:
|
|
- ".*/api/.*"
|
|
- ".*/legacy/.*"
|
|
schedule: 24h
|
|
|
|
# OpenAPI specification
|
|
- name: api-spec
|
|
type: openapi
|
|
url: https://api.example.com/openapi.json
|
|
schedule: 168h # Weekly
|
|
|
|
# GitHub repository docs
|
|
- name: github-repo
|
|
type: github
|
|
repo: org/repository
|
|
branch: main
|
|
include:
|
|
- "(?i)(^|/)README\\.md$"
|
|
- "(?i)(^|/)docs?/"
|
|
|
|
# Local files
|
|
- name: local-docs
|
|
type: local
|
|
path: ./docs
|
|
include:
|
|
- ".*\\.md"
|
|
- ".*\\.txt"
|
|
|
|
# Self-hosted search API (e.g. SearxNG) with no API key
|
|
- name: local-searxng-go
|
|
type: localsearch
|
|
url: http://127.0.0.1:8080/search
|
|
query: golang http client
|
|
result_limit: 8
|
|
domains:
|
|
- pkg.go.dev
|
|
- go.dev
|
|
|
|
# New framework examples
|
|
- name: nextjs-docs
|
|
type: url
|
|
url: https://nextjs.org/docs
|
|
|
|
- name: express-docs
|
|
type: url
|
|
url: https://expressjs.com/en/guide/routing.html
|