mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 12:33:04 +00:00
339 lines
9.1 KiB
Go
339 lines
9.1 KiB
Go
package scraper
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// OpenAPIScraper parses OpenAPI/Swagger specifications.
|
|
type OpenAPIScraper struct {
|
|
config *Config
|
|
client *http.Client
|
|
}
|
|
|
|
// NewOpenAPIScraper creates a new OpenAPI scraper.
|
|
func NewOpenAPIScraper(config *Config) *OpenAPIScraper {
|
|
timeout := 30 * time.Second
|
|
if config != nil && config.Timeout > 0 {
|
|
timeout = config.Timeout
|
|
}
|
|
return &OpenAPIScraper{
|
|
config: config,
|
|
client: &http.Client{Timeout: timeout},
|
|
}
|
|
}
|
|
|
|
// Scrape fetches and parses an OpenAPI specification.
|
|
func (s *OpenAPIScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
|
|
if source == nil {
|
|
return nil, fmt.Errorf("source is required")
|
|
}
|
|
|
|
raw, specURL, err := s.readSpec(ctx, source)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read openapi spec: %w", err)
|
|
}
|
|
|
|
spec, err := parseOpenAPISpec(raw)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse openapi spec %q: %w", specURL, err)
|
|
}
|
|
|
|
docs := make([]*Document, 0)
|
|
mainContent := buildMainSpecContent(spec)
|
|
docs = append(docs, &Document{
|
|
ID: generateDocID(specURL + "#openapi"),
|
|
Source: coalesceSourceName(source.Name, "openapi"),
|
|
Type: "openapi-spec",
|
|
Title: spec.Info.Title,
|
|
Content: mainContent,
|
|
URL: specURL,
|
|
Metadata: map[string]interface{}{
|
|
"openapi": spec.Version,
|
|
"servers": spec.Servers,
|
|
},
|
|
Hash: hashBytes(raw),
|
|
Timestamp: time.Now(),
|
|
})
|
|
|
|
paths := make([]string, 0, len(spec.Paths))
|
|
for path := range spec.Paths {
|
|
paths = append(paths, path)
|
|
}
|
|
sort.Strings(paths)
|
|
|
|
for _, p := range paths {
|
|
opMap := spec.Paths[p]
|
|
methods := make([]string, 0, len(opMap))
|
|
for m := range opMap {
|
|
methods = append(methods, strings.ToUpper(m))
|
|
}
|
|
sort.Strings(methods)
|
|
|
|
for _, method := range methods {
|
|
op := opMap[strings.ToLower(method)]
|
|
if op == nil {
|
|
continue
|
|
}
|
|
title := strings.TrimSpace(op.Summary)
|
|
if title == "" {
|
|
title = fmt.Sprintf("%s %s", method, p)
|
|
}
|
|
content := buildOperationContent(method, p, op)
|
|
docURL := fmt.Sprintf("%s#%s-%s", specURL, strings.ToLower(method), sanitizeFragment(p))
|
|
docs = append(docs, &Document{
|
|
ID: generateDocID(docURL),
|
|
Source: coalesceSourceName(source.Name, "openapi"),
|
|
Type: "openapi-operation",
|
|
Title: title,
|
|
Content: content,
|
|
URL: docURL,
|
|
Metadata: map[string]interface{}{
|
|
"method": method,
|
|
"path": p,
|
|
"operation_id": op.OperationID,
|
|
},
|
|
Hash: hashString(content),
|
|
Timestamp: time.Now(),
|
|
})
|
|
}
|
|
}
|
|
|
|
return docs, nil
|
|
}
|
|
|
|
// DetectChanges checks if the spec has been updated.
|
|
func (s *OpenAPIScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
|
|
if source == nil {
|
|
return false, "", fmt.Errorf("source is required")
|
|
}
|
|
raw, _, err := s.readSpec(ctx, source)
|
|
if err != nil {
|
|
return false, "", err
|
|
}
|
|
hash := hashBytes(raw)
|
|
return hash != lastHash, hash, nil
|
|
}
|
|
|
|
func (s *OpenAPIScraper) readSpec(ctx context.Context, source *Source) ([]byte, string, error) {
|
|
rawPath := strings.TrimSpace(source.URL)
|
|
if rawPath == "" {
|
|
rawPath = strings.TrimSpace(source.Path)
|
|
}
|
|
if rawPath == "" {
|
|
return nil, "", fmt.Errorf("openapi source requires url or path")
|
|
}
|
|
|
|
if strings.HasPrefix(rawPath, "http://") || strings.HasPrefix(rawPath, "https://") {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawPath, nil)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("build openapi fetch request: %w", err)
|
|
}
|
|
if s.config != nil && strings.TrimSpace(s.config.UserAgent) != "" {
|
|
req.Header.Set("User-Agent", s.config.UserAgent)
|
|
}
|
|
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("fetch openapi spec from %s: %w", rawPath, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return nil, "", fmt.Errorf("openapi fetch failed: HTTP %d", resp.StatusCode)
|
|
}
|
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 10<<20))
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("read openapi response body from %s: %w", rawPath, err)
|
|
}
|
|
return body, rawPath, nil
|
|
}
|
|
|
|
b, err := os.ReadFile(rawPath)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("read openapi file %q: %w", rawPath, err)
|
|
}
|
|
return b, "file://" + rawPath, nil
|
|
}
|
|
|
|
type openAPISpec struct {
|
|
Version string `json:"openapi" yaml:"openapi"`
|
|
Swagger string `json:"swagger" yaml:"swagger"`
|
|
Info openAPIInfo `json:"info" yaml:"info"`
|
|
Servers []openAPIServer `json:"servers" yaml:"servers"`
|
|
Paths map[string]pathItems `json:"paths" yaml:"paths"`
|
|
}
|
|
|
|
type openAPIInfo struct {
|
|
Title string `json:"title" yaml:"title"`
|
|
Version string `json:"version" yaml:"version"`
|
|
Description string `json:"description" yaml:"description"`
|
|
}
|
|
|
|
type openAPIServer struct {
|
|
URL string `json:"url" yaml:"url"`
|
|
Description string `json:"description" yaml:"description"`
|
|
}
|
|
|
|
type pathItems map[string]*openAPIOperation
|
|
|
|
type openAPIOperation struct {
|
|
Summary string `json:"summary" yaml:"summary"`
|
|
Description string `json:"description" yaml:"description"`
|
|
OperationID string `json:"operationId" yaml:"operationId"`
|
|
Parameters []openAPIParameter `json:"parameters" yaml:"parameters"`
|
|
Responses map[string]response `json:"responses" yaml:"responses"`
|
|
RequestBody map[string]any `json:"requestBody" yaml:"requestBody"`
|
|
Tags []string `json:"tags" yaml:"tags"`
|
|
Deprecated bool `json:"deprecated" yaml:"deprecated"`
|
|
Security []map[string][]string `json:"security" yaml:"security"`
|
|
}
|
|
|
|
type openAPIParameter struct {
|
|
Name string `json:"name" yaml:"name"`
|
|
In string `json:"in" yaml:"in"`
|
|
Description string `json:"description" yaml:"description"`
|
|
Required bool `json:"required" yaml:"required"`
|
|
}
|
|
|
|
type response struct {
|
|
Description string `json:"description" yaml:"description"`
|
|
}
|
|
|
|
func parseOpenAPISpec(raw []byte) (*openAPISpec, error) {
|
|
var spec openAPISpec
|
|
if err := json.Unmarshal(raw, &spec); err != nil {
|
|
if yamlErr := yaml.Unmarshal(raw, &spec); yamlErr != nil {
|
|
return nil, fmt.Errorf("invalid openapi content (json: %v; yaml: %w)", err, yamlErr)
|
|
}
|
|
}
|
|
|
|
if strings.TrimSpace(spec.Info.Title) == "" {
|
|
spec.Info.Title = "OpenAPI Specification"
|
|
}
|
|
if strings.TrimSpace(spec.Version) == "" {
|
|
spec.Version = spec.Swagger
|
|
}
|
|
if spec.Paths == nil {
|
|
spec.Paths = map[string]pathItems{}
|
|
}
|
|
|
|
return &spec, nil
|
|
}
|
|
|
|
func buildMainSpecContent(spec *openAPISpec) string {
|
|
var b strings.Builder
|
|
fmt.Fprintf(&b, "# %s\n\n", spec.Info.Title)
|
|
if spec.Info.Version != "" {
|
|
fmt.Fprintf(&b, "- API Version: %s\n", spec.Info.Version)
|
|
}
|
|
if spec.Version != "" {
|
|
fmt.Fprintf(&b, "- OpenAPI: %s\n", spec.Version)
|
|
}
|
|
fmt.Fprintf(&b, "- Paths: %d\n", len(spec.Paths))
|
|
if spec.Info.Description != "" {
|
|
fmt.Fprintf(&b, "\n%s\n", strings.TrimSpace(spec.Info.Description))
|
|
}
|
|
if len(spec.Servers) > 0 {
|
|
fmt.Fprintf(&b, "\n## Servers\n")
|
|
for _, s := range spec.Servers {
|
|
fmt.Fprintf(&b, "- %s", s.URL)
|
|
if s.Description != "" {
|
|
fmt.Fprintf(&b, " - %s", s.Description)
|
|
}
|
|
fmt.Fprintln(&b)
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
func buildOperationContent(method, path string, op *openAPIOperation) string {
|
|
var b strings.Builder
|
|
fmt.Fprintf(&b, "# %s %s\n\n", method, path)
|
|
if op.Summary != "" {
|
|
fmt.Fprintf(&b, "%s\n\n", strings.TrimSpace(op.Summary))
|
|
}
|
|
if op.Description != "" {
|
|
fmt.Fprintf(&b, "%s\n\n", strings.TrimSpace(op.Description))
|
|
}
|
|
if op.OperationID != "" {
|
|
fmt.Fprintf(&b, "- Operation ID: `%s`\n", op.OperationID)
|
|
}
|
|
if len(op.Tags) > 0 {
|
|
fmt.Fprintf(&b, "- Tags: %s\n", strings.Join(op.Tags, ", "))
|
|
}
|
|
if op.Deprecated {
|
|
fmt.Fprintln(&b, "- Deprecated: true")
|
|
}
|
|
if len(op.Parameters) > 0 {
|
|
fmt.Fprintln(&b, "\n## Parameters")
|
|
for _, p := range op.Parameters {
|
|
req := "optional"
|
|
if p.Required {
|
|
req = "required"
|
|
}
|
|
fmt.Fprintf(&b, "- `%s` (%s, %s)", p.Name, p.In, req)
|
|
if p.Description != "" {
|
|
fmt.Fprintf(&b, ": %s", strings.TrimSpace(p.Description))
|
|
}
|
|
fmt.Fprintln(&b)
|
|
}
|
|
}
|
|
if len(op.Responses) > 0 {
|
|
codes := make([]string, 0, len(op.Responses))
|
|
for code := range op.Responses {
|
|
codes = append(codes, code)
|
|
}
|
|
sort.Strings(codes)
|
|
fmt.Fprintln(&b, "\n## Responses")
|
|
for _, code := range codes {
|
|
resp := op.Responses[code]
|
|
fmt.Fprintf(&b, "- `%s`", code)
|
|
if resp.Description != "" {
|
|
fmt.Fprintf(&b, ": %s", strings.TrimSpace(resp.Description))
|
|
}
|
|
fmt.Fprintln(&b)
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
func sanitizeFragment(path string) string {
|
|
path = strings.ToLower(path)
|
|
path = strings.ReplaceAll(path, "/", "-")
|
|
path = strings.ReplaceAll(path, "{", "")
|
|
path = strings.ReplaceAll(path, "}", "")
|
|
path = strings.Trim(path, "-")
|
|
if path == "" {
|
|
return "root"
|
|
}
|
|
return path
|
|
}
|
|
|
|
func hashBytes(b []byte) string {
|
|
h := sha256.Sum256(b)
|
|
return hex.EncodeToString(h[:])
|
|
}
|
|
|
|
func hashString(s string) string {
|
|
h := sha256.Sum256([]byte(s))
|
|
return hex.EncodeToString(h[:])
|
|
}
|
|
|
|
func coalesceSourceName(name, fallback string) string {
|
|
if strings.TrimSpace(name) != "" {
|
|
return name
|
|
}
|
|
return fallback
|
|
}
|