mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
# Devour Example Configuration
|
||||
# Copy this file to devour.yaml and customize for your needs
|
||||
|
||||
version: 1
|
||||
|
||||
# Storage paths
|
||||
storage:
|
||||
docs_dir: ./devour_data/docs
|
||||
index_dir: ./devour_data/index
|
||||
metadata_dir: ./devour_data/metadata
|
||||
|
||||
# Embedding settings
|
||||
embeddings:
|
||||
provider: openai # openai, mock
|
||||
model: text-embedding-3-small
|
||||
dimensions: 1536
|
||||
api_key: ${OPENAI_API_KEY} # Use environment variable
|
||||
batch_size: 100
|
||||
|
||||
# Vector database
|
||||
vector_db:
|
||||
type: memory # memory, chromem
|
||||
persist: true
|
||||
similarity_metric: cosine
|
||||
|
||||
# Scraping settings
|
||||
scraper:
|
||||
user_agent: "Devour/1.0 (+https://github.com/yourorg/devour)"
|
||||
timeout: 30s
|
||||
retry_count: 3
|
||||
retry_delay: 5s
|
||||
concurrency: 10
|
||||
rate_limit: 500ms
|
||||
max_depth: 3
|
||||
cache_dir: ./devour_data/cache
|
||||
|
||||
# Scheduler
|
||||
scheduler:
|
||||
enabled: true
|
||||
interval: 72h # Every 3 days
|
||||
check_method: hash # hash, timestamp
|
||||
on_startup: false
|
||||
|
||||
# Server settings
|
||||
server:
|
||||
mode: local # local, remote
|
||||
port: 8080
|
||||
host: localhost
|
||||
|
||||
# Example sources
|
||||
sources:
|
||||
# Web documentation
|
||||
- name: example-docs
|
||||
type: url
|
||||
url: https://docs.example.com
|
||||
include:
|
||||
- ".*\\.md"
|
||||
- ".*\\.html"
|
||||
exclude:
|
||||
- ".*/api/.*"
|
||||
- ".*/legacy/.*"
|
||||
schedule: 24h
|
||||
|
||||
# OpenAPI specification
|
||||
- name: api-spec
|
||||
type: openapi
|
||||
url: https://api.example.com/openapi.json
|
||||
schedule: 168h # Weekly
|
||||
|
||||
# GitHub repository
|
||||
- name: github-repo
|
||||
type: github
|
||||
repo: org/repository
|
||||
branch: main
|
||||
include:
|
||||
- "docs/.*"
|
||||
- "README.md"
|
||||
exclude:
|
||||
- "docs/internal/.*"
|
||||
# auth_token: ${GITHUB_TOKEN} # Optional for private repos
|
||||
|
||||
# Local files
|
||||
- name: local-docs
|
||||
type: local
|
||||
path: ./docs
|
||||
include:
|
||||
- ".*\\.md"
|
||||
- ".*\\.txt"
|
||||
Reference in New Issue
Block a user