Files
Devour/cmd/ask.go
T
2026-02-24 12:10:13 +01:00

1049 lines
27 KiB
Go

package cmd
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"path"
"regexp"
"sort"
"strings"
"time"
"github.com/spf13/cobra"
appconfig "github.com/yourorg/devour/internal/config"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/internal/search"
"github.com/yourorg/devour/internal/storage"
)
var askCmd = &cobra.Command{
Use: "ask <question>",
Short: "Ask docs directly and get a structured answer",
Long: `Fetch official documentation for a language, rank the most relevant sections,
and return a structured answer with sources.
Examples:
devour ask --lang go "how to regex match digits" --format json
devour ask --lang python "async timeout example"
devour ask --lang rust "tokio spawn best practice" --max-sources 5`,
Args: cobra.MinimumNArgs(1),
RunE: runAsk,
}
var (
askLanguage string
askFormat string
askMaxSources int
askTimeoutSec int
)
type askResponse struct {
Query string `json:"query"`
Language string `json:"language"`
SearchedTerms []string `json:"searched_terms"`
Retrieval askRetrieval `json:"retrieval"`
Answer askAnswer `json:"answer"`
Sources []askSource `json:"sources"`
Confidence float64 `json:"confidence"`
FetchedAt time.Time `json:"fetched_at"`
}
type askRetrieval struct {
Mode string `json:"mode"`
LocalHits int `json:"local_hits"`
FallbackFetched int `json:"fallback_fetch_count"`
}
type askAnswer struct {
Summary string `json:"summary"`
RecommendedAPI []string `json:"recommended_api"`
Example string `json:"example,omitempty"`
Notes []string `json:"notes"`
}
type askSource struct {
DocID string `json:"doc_id"`
Title string `json:"title"`
URL string `json:"url"`
Type string `json:"type"`
SearchTerm string `json:"search_term"`
Relevance float64 `json:"relevance"`
Snippet string `json:"snippet"`
}
type rankedDoc struct {
doc *scraper.Document
score float64
searchTerm string
}
type askPersistenceWarning struct {
operation string
cause error
}
func (w *askPersistenceWarning) Error() string {
return fmt.Sprintf("persistence warning: %s: %v", w.operation, w.cause)
}
func (w *askPersistenceWarning) Unwrap() error {
return w.cause
}
func init() {
askCmd.Flags().StringVar(&askLanguage, "lang", "", "language/framework (required)")
askCmd.Flags().StringVarP(&askFormat, "format", "f", "json", "output format (json, text)")
askCmd.Flags().IntVar(&askMaxSources, "max-sources", 5, "maximum number of source snippets to return")
askCmd.Flags().IntVar(&askTimeoutSec, "timeout", 60, "request timeout in seconds")
_ = askCmd.MarkFlagRequired("lang")
}
func runAsk(cmd *cobra.Command, args []string) error {
question := strings.TrimSpace(strings.Join(args, " "))
if question == "" {
return fmt.Errorf("question is required")
}
languageIn := strings.ToLower(strings.TrimSpace(askLanguage))
language, ok := normalizeLanguage(languageIn)
if !ok {
return fmt.Errorf("unsupported language: %s. Supported: %s", languageIn, strings.Join(supportedLanguages(), ", "))
}
terms := deriveSearchTerms(language, question)
if len(terms) == 0 {
return fmt.Errorf("could not derive a search term from the question")
}
if askMaxSources <= 0 {
askMaxSources = 5
}
if askTimeoutSec <= 0 {
askTimeoutSec = 60
}
cfg, err := loadAppConfig()
if err != nil {
return fmt.Errorf("load app config for ask command: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(askTimeoutSec)*time.Second)
defer cancel()
localRanked, localErr := gatherLocalAskDocs(ctx, cfg, language, question, terms, askMaxSources*4)
if localErr != nil {
localRanked = nil
}
ranked := append([]rankedDoc{}, localRanked...)
retrievalMode := "local-first"
fallbackNeeded := shouldFallbackToLive(localRanked, terms)
fallbackCount := 0
fetchErrors := []error{}
if localErr != nil {
fetchErrors = append(fetchErrors, fmt.Errorf("local retrieval failed: %w", localErr))
}
if fallbackNeeded {
fallbackDocs, fetched, errs := fetchAskDocsFromLive(ctx, cfg, language, question, terms)
fallbackCount = fetched
fetchErrors = append(fetchErrors, errs...)
ranked = mergeRankedDocs(ranked, fallbackDocs)
if fetched > 0 {
retrievalMode = "local+live"
} else if len(localRanked) == 0 {
retrievalMode = "live"
}
}
if len(ranked) == 0 {
if len(fetchErrors) == 0 {
return fmt.Errorf("no docs found for %q", language)
}
return fmt.Errorf("no docs found for %q: %w", language, errors.Join(fetchErrors...))
}
sort.Slice(ranked, func(i, j int) bool {
if ranked[i].score == ranked[j].score {
return ranked[i].doc.Title < ranked[j].doc.Title
}
return ranked[i].score > ranked[j].score
})
if askMaxSources > len(ranked) {
askMaxSources = len(ranked)
}
top := ranked[:askMaxSources]
response := askResponse{
Query: question,
Language: language,
SearchedTerms: terms,
Retrieval: askRetrieval{
Mode: retrievalMode,
LocalHits: len(localRanked),
FallbackFetched: fallbackCount,
},
Answer: askAnswer{
Summary: summarizeTopDocs(question, top),
RecommendedAPI: ensureAPISlice(extractRecommendedAPI(top)),
Example: extractExample(top),
Notes: []string{
"Retrieval is hybrid local-first: local index first, then targeted live fetch fallback.",
"Relevance uses lexical ranking across local snippets and fetched docs.",
},
},
Sources: buildAskSources(question, top),
Confidence: computeConfidence(question, top),
FetchedAt: time.Now(),
}
for _, fetchErr := range fetchErrors {
var persistenceWarning *askPersistenceWarning
if errors.As(fetchErr, &persistenceWarning) {
response.Answer.Notes = append(response.Answer.Notes, persistenceWarning.Error())
}
}
switch strings.ToLower(askFormat) {
case "text":
printAskText(response)
return nil
case "json":
enc := json.NewEncoder(cmd.OutOrStdout())
enc.SetIndent("", " ")
return enc.Encode(response)
default:
return fmt.Errorf("unsupported format: %s", askFormat)
}
}
func gatherLocalAskDocs(ctx context.Context, cfg *appconfig.Config, language, question string, terms []string, limit int) ([]rankedDoc, error) {
engine := search.NewEngine(cfg)
query := strings.TrimSpace(question + " " + language + " " + strings.Join(terms, " "))
results, _, err := engine.Search(ctx, query, search.SearchOptions{Limit: limit * 2})
if err != nil {
return nil, err
}
out := make([]rankedDoc, 0, len(results))
for _, result := range results {
if !resultMatchesLanguage(result, language) {
continue
}
out = append(out, rankedDoc{
doc: &scraper.Document{
ID: result.DocID,
Source: result.Source,
Type: result.Type,
Title: result.Title,
Content: result.Snippet,
URL: result.URL,
},
score: result.Score,
searchTerm: "local-index",
})
}
return out, nil
}
func resultMatchesLanguage(result search.Result, language string) bool {
urlLower := strings.ToLower(result.URL)
typeLower := strings.ToLower(result.Type)
sourceLower := strings.ToLower(result.Source)
titleLower := strings.ToLower(result.Title)
matchAny := func(parts ...string) bool {
for _, p := range parts {
if strings.Contains(urlLower, p) || strings.Contains(typeLower, p) || strings.Contains(sourceLower, p) || strings.Contains(titleLower, p) {
return true
}
}
return false
}
switch language {
case "go":
return matchAny("pkg.go.dev", "go-")
case "rust":
return matchAny("docs.rs", "rust-")
case "python":
return matchAny("docs.python.org", "python")
case "java":
return matchAny("docs.oracle.com", "java")
case "spring":
return matchAny("docs.spring.io", "spring")
case "typescript":
return matchAny("typescriptlang.org", "ts-")
case "react":
return matchAny("react.dev", "react")
case "vue":
return matchAny("vuejs.org", "vue")
case "nuxt":
return matchAny("nuxt.com", "nuxt")
case "docker":
return matchAny("docs.docker.com", "docker")
case "cloudflare":
return matchAny("developers.cloudflare.com", "cloudflare")
case "astro":
return matchAny("docs.astro.build", "astro")
case "csharp":
return matchAny("learn.microsoft.com", "c#")
case "kotlin":
return matchAny("kotlinlang.org", "kotlin")
case "php":
return matchAny("php.net", "php")
case "ruby":
return matchAny("ruby-doc.org", "ruby")
case "elixir":
return matchAny("hexdocs.pm", "elixir")
case "nextjs":
return matchAny("nextjs.org", "next")
case "svelte":
return matchAny("svelte.dev", "svelte")
case "angular":
return matchAny("angular.dev", "angular")
case "remix":
return matchAny("remix.run", "remix")
case "solid":
return matchAny("solidjs.com", "solid")
case "express":
return matchAny("expressjs.com", "express")
default:
return true
}
}
func fetchAskDocsFromLive(ctx context.Context, cfg *appconfig.Config, language, question string, terms []string) ([]rankedDoc, int, []error) {
sourceType := scraper.SourceType(mapLanguageToType(language))
if sourceType == "" {
return nil, 0, []error{fmt.Errorf("unsupported language: %s", language)}
}
sc := toScraperConfig(cfg, 2)
sc.MaxDepth = 1
s := scraper.NewScraper(sourceType, sc)
if s == nil {
return nil, 0, []error{fmt.Errorf("no scraper for %s (%s)", language, sourceType)}
}
var ranked []rankedDoc
var fetchErrors []error
seenURL := make(map[string]bool)
totalFetched := 0
fetchedDocs := make([]*scraper.Document, 0)
for _, term := range terms {
docURLs, err := candidateDocURLs(language, term)
if err != nil {
fetchErrors = append(fetchErrors, fmt.Errorf("%s: %w", term, err))
continue
}
termFetched := false
termErrors := make([]error, 0, len(docURLs))
for _, docURL := range docURLs {
if seenURL[docURL] {
continue
}
seenURL[docURL] = true
source := &scraper.Source{
Name: fmt.Sprintf("%s:%s", language, term),
Type: sourceType,
URL: docURL,
}
applySourceProfile(source)
docs, err := s.Scrape(ctx, source)
if err != nil {
termErrors = append(termErrors, fmt.Errorf("%s: %w", docURL, err))
continue
}
if len(docs) == 0 {
termErrors = append(termErrors, fmt.Errorf("%s: no documents extracted", docURL))
continue
}
termFetched = true
totalFetched += len(docs)
fetchedDocs = append(fetchedDocs, docs...)
for _, doc := range docs {
ranked = append(ranked, rankedDoc{
doc: doc,
score: scoreDocument(question, doc),
searchTerm: term,
})
}
// Stop after first successful candidate for this term.
break
}
if !termFetched && len(termErrors) > 0 {
fetchErrors = append(fetchErrors, errors.Join(termErrors...))
}
}
// Persist fallback docs for future local-first queries.
if len(fetchedDocs) > 0 {
if _, err := storage.SaveDocuments(fetchedDocs, storage.SaveOptions{
Format: "json",
OutputDir: cfg.Storage.DocsDir,
AllowEmpty: true,
}); err != nil {
fetchErrors = append(fetchErrors, &askPersistenceWarning{
operation: "save fallback docs",
cause: err,
})
}
if cfg.Indexing.Enabled {
engine := search.NewEngine(cfg)
if _, err := engine.Rebuild(context.Background()); err != nil {
fetchErrors = append(fetchErrors, &askPersistenceWarning{
operation: "rebuild index after fallback",
cause: err,
})
}
}
}
return ranked, totalFetched, fetchErrors
}
func candidateDocURLs(language, term string) ([]string, error) {
primary, err := constructDocURL(language, term)
if err != nil {
return nil, err
}
seen := map[string]bool{}
out := make([]string, 0, 5)
add := func(raw string) {
raw = strings.TrimSpace(raw)
if raw == "" || seen[raw] {
return
}
seen[raw] = true
out = append(out, raw)
}
add(primary)
switch language {
case "nextjs":
add("https://nextjs.org/docs/app/building-your-application/routing")
add("https://nextjs.org/docs/app/building-your-application/data-fetching")
add("https://nextjs.org/docs")
case "svelte":
add("https://svelte.dev/docs/kit")
add("https://svelte.dev/docs/svelte/overview")
case "angular":
add("https://angular.dev/guide/http")
add("https://angular.dev/guide/components")
case "remix":
add("https://v2.remix.run/docs/file-conventions/routes")
add("https://v2.remix.run/docs")
case "solid":
add("https://github.com/solidjs/solid-docs")
case "express":
add("https://expressjs.com/en/guide/routing.html")
add("https://expressjs.com/en/guide/using-middleware.html")
}
return out, nil
}
func mergeRankedDocs(primary, secondary []rankedDoc) []rankedDoc {
merged := make([]rankedDoc, 0, len(primary)+len(secondary))
seen := map[string]bool{}
add := func(item rankedDoc) {
if item.doc == nil {
return
}
key := strings.TrimSpace(item.doc.URL)
if key == "" {
key = strings.TrimSpace(item.doc.ID)
}
if key == "" {
key = strings.TrimSpace(item.doc.Title) + ":" + item.searchTerm
}
if seen[key] {
return
}
seen[key] = true
merged = append(merged, item)
}
for _, p := range primary {
add(p)
}
for _, s := range secondary {
add(s)
}
return merged
}
func topLocalMatchesTerms(ranked []rankedDoc, terms []string) bool {
if len(ranked) == 0 || len(terms) == 0 {
return false
}
maxCheck := len(ranked)
if maxCheck > 5 {
maxCheck = 5
}
for i := 0; i < maxCheck; i++ {
if ranked[i].doc == nil {
continue
}
hay := strings.ToLower(ranked[i].doc.Title + " " + ranked[i].doc.URL + " " + ranked[i].doc.Content)
for _, term := range terms {
if strings.Contains(hay, strings.ToLower(term)) {
return true
}
}
}
return false
}
func shouldFallbackToLive(localRanked []rankedDoc, terms []string) bool {
if len(localRanked) == 0 {
return true
}
// Very low confidence local ranking should trigger a live fetch.
if localRanked[0].score < 0.2 {
return true
}
if !topLocalMatchesTerms(localRanked, terms) {
return true
}
// If we only have one weak match, try live fallback to improve recall.
if len(localRanked) < 2 && localRanked[0].score < 1.5 {
return true
}
return false
}
func deriveSearchTerms(language, question string) []string {
q := strings.ToLower(question)
var terms []string
has := func(parts ...string) bool {
for _, p := range parts {
if strings.Contains(q, p) {
return true
}
}
return false
}
switch language {
case "go":
if has("regex", "regexp", "regular expression") {
terms = append(terms, "regexp")
}
if has("http", "request", "response", "server", "client") {
terms = append(terms, "net/http")
}
if has("json") {
terms = append(terms, "encoding/json")
}
case "rust":
if has("regex", "regexp") {
terms = append(terms, "regex")
}
if has("async", "await", "task") {
terms = append(terms, "tokio")
}
case "python":
if has("regex", "regexp", "regular expression") {
terms = append(terms, "re")
}
if has("async", "await", "task") {
terms = append(terms, "asyncio")
}
case "java":
if has("regex", "regexp") {
terms = append(terms, "java/util/regex/package-summary")
}
if has("http", "client") {
terms = append(terms, "java/net/http/package-summary")
}
case "spring":
if has("mcp") {
terms = append(terms, "mcp-overview")
}
terms = append(terms, "features")
case "typescript":
if has("regex", "regexp") {
terms = append(terms, "2/template-literal-types")
}
terms = append(terms, "2/basic-types")
case "react":
if has("hook", "state", "effect", "memo") {
terms = append(terms, "hooks")
}
terms = append(terms, "hooks")
case "vue":
if has("reactivity", "ref", "computed", "watch") {
terms = append(terms, "essentials/reactivity-fundamentals")
}
terms = append(terms, "essentials/reactivity-fundamentals")
case "nuxt":
terms = append(terms, "directory-structure")
case "docker":
terms = append(terms, "compose")
case "cloudflare":
terms = append(terms, "workers")
case "astro":
terms = append(terms, "components")
case "csharp":
if has("regex", "regexp", "regular expression") {
terms = append(terms, "regular-expressions")
}
terms = append(terms, "operators")
case "kotlin":
if has("regex", "regexp", "regular expression") {
terms = append(terms, "regex")
}
terms = append(terms, "collections-overview")
case "php":
if has("regex", "regexp", "regular expression") {
terms = append(terms, "function.preg-match.php")
}
terms = append(terms, "pcre")
case "ruby":
if has("regex", "regexp", "regular expression") {
terms = append(terms, "Regexp")
}
terms = append(terms, "String")
case "elixir":
if has("regex", "regexp", "regular expression") {
terms = append(terms, "Regex")
}
terms = append(terms, "String")
case "nextjs":
if has("routing", "route", "router") {
terms = append(terms, "routing")
}
if has("data", "fetch", "server") {
terms = append(terms, "data-fetching")
}
terms = append(terms, "routing")
case "svelte":
if has("store", "state") {
terms = append(terms, "stores")
}
if has("kit", "routing", "load") {
terms = append(terms, "kit")
}
terms = append(terms, "overview")
case "angular":
if has("http", "client", "request") {
terms = append(terms, "http")
}
if has("routing", "route", "router") {
terms = append(terms, "routing")
}
terms = append(terms, "components")
case "remix":
if has("route", "routing") {
terms = append(terms, "routes")
}
if has("loader", "action", "data") {
terms = append(terms, "loaders")
}
terms = append(terms, "routes")
case "solid":
added := false
if has("router", "route", "routing") {
terms = append(terms, "solid-router")
added = true
}
if has("signal", "state") {
terms = append(terms, "signals")
added = true
}
if has("server", "ssr", "start") {
terms = append(terms, "solid-start")
added = true
}
if !added {
terms = append(terms, "signals")
}
case "express":
if has("middleware", "next", "request") {
terms = append(terms, "middleware")
}
if has("routing", "route", "router") {
terms = append(terms, "routing")
}
terms = append(terms, "middleware")
}
// Use a primary token from the query as a candidate if it looks useful.
if primary := primaryQueryToken(question); primary != "" {
terms = append(terms, primary)
}
return dedupeTerms(terms)
}
func primaryQueryToken(query string) string {
stop := map[string]bool{
"how": true, "to": true, "do": true, "i": true, "in": true,
"the": true, "a": true, "an": true, "best": true, "way": true,
"for": true, "with": true, "and": true, "using": true,
"what": true, "why": true, "when": true, "where": true, "which": true,
"does": true, "is": true, "are": true, "can": true, "could": true,
"should": true, "would": true, "need": true, "help": true, "please": true,
"show": true, "example": true, "examples": true, "docs": true, "documentation": true,
}
for _, tok := range tokenize(query) {
if len(tok) < 3 || stop[tok] {
continue
}
if _, ok := normalizeLanguage(tok); ok {
continue
}
return tok
}
return ""
}
func dedupeTerms(terms []string) []string {
seen := make(map[string]bool)
out := make([]string, 0, len(terms))
for _, term := range terms {
term = strings.TrimSpace(term)
key := strings.ToLower(term)
if term == "" || seen[key] {
continue
}
seen[key] = true
out = append(out, term)
}
return out
}
func scoreDocument(query string, doc *scraper.Document) float64 {
tokens := tokenize(query)
title := strings.ToLower(doc.Title)
content := strings.ToLower(doc.Content)
docType := strings.ToLower(doc.Type)
var score float64 = 0.1
for _, tok := range tokens {
if strings.Contains(title, tok) {
score += 3.0
}
if strings.Contains(content, tok) {
score += 1.0
}
if urlContains(doc.URL, tok) {
score += 0.8
}
}
if strings.Contains(docType, "function") || strings.Contains(docType, "method") {
score += 0.3
}
if strings.Contains(docType, "section") {
score += 0.2
}
return score
}
func urlContains(rawURL, token string) bool {
u, err := url.Parse(rawURL)
if err != nil {
return strings.Contains(strings.ToLower(rawURL), token)
}
return strings.Contains(strings.ToLower(u.Path), token) || strings.Contains(strings.ToLower(u.Fragment), token)
}
func summarizeTopDocs(question string, docs []rankedDoc) string {
if len(docs) == 0 {
return ""
}
snippet := extractSnippet(docs[0].doc.Content, tokenize(question))
if snippet == "" || len(snippet) < 30 {
return docs[0].doc.Title
}
return snippet
}
func ensureAPISlice(apis []string) []string {
if apis == nil {
return []string{}
}
return apis
}
func extractRecommendedAPI(docs []rankedDoc) []string {
titlePattern := regexp.MustCompile(`\.(?:func|type|method)\s+([A-Za-z_][A-Za-z0-9_]*)`)
titleDashPattern := regexp.MustCompile(`^([A-Za-z_][A-Za-z0-9_.]+)\s+-`)
callPattern := regexp.MustCompile(`\b([A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?)\(`)
disallowed := map[string]bool{
"main": true, "len": true, "make": true, "new": true, "append": true,
"copy": true, "print": true, "println": true, "panic": true,
}
seen := make(map[string]bool)
var out []string
pkgHints := make(map[string]bool)
for _, rd := range docs {
if u, err := url.Parse(rd.doc.URL); err == nil {
base := strings.ToLower(path.Base(strings.Trim(u.Path, "/")))
if base != "" && base != "." && base != "/" {
pkgHints[base] = true
}
}
if m := titlePattern.FindStringSubmatch(rd.doc.Title); len(m) > 1 {
name := m[1]
if !seen[name] {
seen[name] = true
out = append(out, name)
}
}
if m := titleDashPattern.FindStringSubmatch(rd.doc.Title); len(m) > 1 {
name := m[1]
if !seen[name] {
seen[name] = true
out = append(out, name)
}
}
}
// Titles are usually higher quality than free-form content call extraction.
if len(out) >= 6 {
return out[:6]
}
for _, rd := range docs {
for _, m := range callPattern.FindAllStringSubmatch(rd.doc.Content, -1) {
if len(m) < 2 {
continue
}
name := m[1]
if len(name) < 3 || seen[name] || disallowed[strings.ToLower(name)] {
continue
}
if !strings.Contains(name, ".") {
r := rune(name[0])
if r >= 'a' && r <= 'z' {
// Skip local/internal-looking identifiers like validID.
continue
}
} else {
parts := strings.SplitN(name, ".", 2)
if len(parts) != 2 {
continue
}
if !pkgHints[strings.ToLower(parts[0])] {
// Keep calls scoped to the fetched package/module docs.
continue
}
}
if strings.Count(name, ".") > 2 {
continue
}
seen[name] = true
out = append(out, name)
if len(out) >= 8 {
return out
}
}
}
return out
}
func extractExample(docs []rankedDoc) string {
blockPattern := regexp.MustCompile("(?s)```[a-zA-Z0-9]*\\n(.*?)\\n```")
var fallback string
for _, rd := range docs {
matches := blockPattern.FindAllStringSubmatch(rd.doc.Content, -1)
for _, m := range matches {
if len(m) < 2 {
continue
}
example := strings.TrimSpace(m[1])
if example == "" {
continue
}
if fallback == "" {
fallback = example
}
// Prefer executable-looking snippets over plain signatures.
if strings.Contains(example, "\n") || strings.Contains(example, "=") || strings.Contains(example, "{") {
if len(example) > 500 {
example = example[:500]
}
return sanitizeSnippet(example)
}
}
}
if fallback != "" {
if len(fallback) > 500 {
fallback = fallback[:500]
}
return sanitizeSnippet(fallback)
}
return ""
}
func buildAskSources(question string, docs []rankedDoc) []askSource {
tokens := tokenize(question)
out := make([]askSource, 0, len(docs))
for _, rd := range docs {
out = append(out, askSource{
DocID: rd.doc.ID,
Title: rd.doc.Title,
URL: rd.doc.URL,
Type: rd.doc.Type,
SearchTerm: rd.searchTerm,
Relevance: rd.score,
Snippet: extractSnippet(rd.doc.Content, tokens),
})
}
return out
}
func computeConfidence(question string, docs []rankedDoc) float64 {
if len(docs) == 0 {
return 0
}
tokCount := len(tokenize(question))
if tokCount == 0 {
tokCount = 1
}
maxPossible := float64(tokCount) * 4.0
if maxPossible < 1 {
maxPossible = 1
}
conf := docs[0].score / maxPossible
if len(docs) > 1 && docs[0].score-docs[1].score < 0.5 {
conf *= 0.92
}
if conf < 0.1 {
conf = 0.1
}
if conf > 0.99 {
conf = 0.99
}
return conf
}
func tokenize(text string) []string {
nonWord := regexp.MustCompile(`[^a-zA-Z0-9_/.-]+`)
clean := strings.ToLower(nonWord.ReplaceAllString(text, " "))
raw := strings.Fields(clean)
out := make([]string, 0, len(raw))
for _, tok := range raw {
if len(tok) < 2 {
continue
}
out = append(out, tok)
}
return out
}
func extractSnippet(content string, tokens []string) string {
if content == "" {
return ""
}
flat := strings.Join(strings.Fields(content), " ")
if len(flat) > 7000 {
flat = flat[:7000]
}
lower := strings.ToLower(flat)
pos := -1
for _, tok := range tokens {
if idx := strings.Index(lower, tok); idx >= 0 {
pos = idx
break
}
}
if pos < 0 {
if len(flat) > 220 {
return sanitizeSnippet(flat[:220])
}
return sanitizeSnippet(flat)
}
// Prefer sentence-ish boundaries around the match for cleaner snippets.
start := strings.LastIndexAny(flat[:pos], ".!?")
if start >= 0 {
start++
} else {
start = pos - 90
if start < 0 {
start = 0
}
}
end := len(flat)
if next := strings.IndexAny(flat[pos:], ".!?"); next >= 0 {
end = pos + next + 1
} else {
end = pos + 180
if end > len(flat) {
end = len(flat)
}
}
snippet := strings.TrimSpace(flat[start:end])
if len(snippet) > 260 {
snippet = snippet[:260]
}
return sanitizeSnippet(snippet)
}
func sanitizeSnippet(s string) string {
replacer := strings.NewReplacer("```", " ", "`", "", "¶", " ", "\u00a0", " ")
s = replacer.Replace(s)
s = strings.Join(strings.Fields(s), " ")
return strings.TrimSpace(s)
}
func printAskText(resp askResponse) {
fmt.Fprintf(rootCmd.OutOrStdout(), "Query: %s\n", resp.Query)
fmt.Fprintf(rootCmd.OutOrStdout(), "Language: %s\n", resp.Language)
fmt.Fprintf(rootCmd.OutOrStdout(), "Searched terms: %s\n", strings.Join(resp.SearchedTerms, ", "))
fmt.Fprintf(rootCmd.OutOrStdout(), "Retrieval: %s (local hits: %d, fallback fetched: %d)\n", resp.Retrieval.Mode, resp.Retrieval.LocalHits, resp.Retrieval.FallbackFetched)
fmt.Fprintf(rootCmd.OutOrStdout(), "Confidence: %.2f\n\n", resp.Confidence)
fmt.Fprintln(rootCmd.OutOrStdout(), "Summary:")
fmt.Fprintln(rootCmd.OutOrStdout(), resp.Answer.Summary)
fmt.Fprintln(rootCmd.OutOrStdout())
if len(resp.Answer.RecommendedAPI) > 0 {
fmt.Fprintln(rootCmd.OutOrStdout(), "Recommended API:")
for _, api := range resp.Answer.RecommendedAPI {
fmt.Fprintf(rootCmd.OutOrStdout(), " - %s\n", api)
}
fmt.Fprintln(rootCmd.OutOrStdout())
}
if resp.Answer.Example != "" {
fmt.Fprintln(rootCmd.OutOrStdout(), "Example:")
fmt.Fprintln(rootCmd.OutOrStdout(), resp.Answer.Example)
fmt.Fprintln(rootCmd.OutOrStdout())
}
fmt.Fprintln(rootCmd.OutOrStdout(), "Sources:")
for _, src := range resp.Sources {
fmt.Fprintf(rootCmd.OutOrStdout(), "- [%s] %s (%s)\n %s\n", src.Type, src.Title, src.SearchTerm, src.URL)
}
}