This commit is contained in:
Tomas Dvorak
2026-02-24 10:33:59 +01:00
parent 409acd2e08
commit 898a3c303f
1374 changed files with 290409 additions and 29187 deletions
+39 -9
View File
@@ -8,8 +8,9 @@ storage:
docs_dir: ./devour_data/docs
index_dir: ./devour_data/index
metadata_dir: ./devour_data/metadata
cache_dir: ./devour_data/cache
# Embedding settings
# Embedding settings (optional for lexical search; required for future embedding flows)
embeddings:
provider: openai # openai, mock
model: text-embedding-3-small
@@ -17,7 +18,7 @@ embeddings:
api_key: ${OPENAI_API_KEY} # Use environment variable
batch_size: 100
# Vector database
# Vector database (optional in current local-first pipeline)
vector_db:
type: memory # memory, chromem
persist: true
@@ -28,7 +29,7 @@ scraper:
user_agent: "Devour/1.0 (+https://github.com/yourorg/devour)"
timeout: 30s
retry_count: 3
retry_delay: 5s
retry_delay: 1s
concurrency: 10
rate_limit: 500ms
max_depth: 3
@@ -44,9 +45,22 @@ scheduler:
# Server settings
server:
mode: local # local, remote
transport: stdio # stdio, http
port: 8080
host: localhost
# Local lexical indexing defaults
indexing:
enabled: true
auto_reindex: true
snippet_length: 220
max_docs: 10000
# Verification defaults
verification:
enabled: true
timeout: 90s
# Example sources
sources:
# Web documentation
@@ -67,17 +81,14 @@ sources:
url: https://api.example.com/openapi.json
schedule: 168h # Weekly
# GitHub repository
# GitHub repository docs
- name: github-repo
type: github
repo: org/repository
branch: main
include:
- "docs/.*"
- "README.md"
exclude:
- "docs/internal/.*"
# auth_token: ${GITHUB_TOKEN} # Optional for private repos
- "(?i)(^|/)README\\.md$"
- "(?i)(^|/)docs?/"
# Local files
- name: local-docs
@@ -86,3 +97,22 @@ sources:
include:
- ".*\\.md"
- ".*\\.txt"
# Self-hosted search API (e.g. SearxNG) with no API key
- name: local-searxng-go
type: localsearch
url: http://127.0.0.1:8080/search
query: golang http client
result_limit: 8
domains:
- pkg.go.dev
- go.dev
# New framework examples
- name: nextjs-docs
type: url
url: https://nextjs.org/docs
- name: express-docs
type: url
url: https://expressjs.com/en/guide/routing.html