mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-04 04:23:02 +00:00
update
This commit is contained in:
+316
-9
@@ -2,30 +2,337 @@ package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// OpenAPIScraper parses OpenAPI/Swagger specifications.
|
||||
type OpenAPIScraper struct {
|
||||
config *Config
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewOpenAPIScraper creates a new OpenAPI scraper.
|
||||
func NewOpenAPIScraper(config *Config) *OpenAPIScraper {
|
||||
return &OpenAPIScraper{config: config}
|
||||
timeout := 30 * time.Second
|
||||
if config != nil && config.Timeout > 0 {
|
||||
timeout = config.Timeout
|
||||
}
|
||||
return &OpenAPIScraper{
|
||||
config: config,
|
||||
client: &http.Client{Timeout: timeout},
|
||||
}
|
||||
}
|
||||
|
||||
// Scrape fetches and parses an OpenAPI specification.
|
||||
func (s *OpenAPIScraper) Scrape(ctx context.Context, source *Source) ([]*Document, error) {
|
||||
// TODO: Implement OpenAPI parsing
|
||||
// 1. Fetch spec from URL
|
||||
// 2. Parse endpoints, schemas, descriptions
|
||||
// 3. Create documents per endpoint
|
||||
// 4. Include authentication, parameters
|
||||
return nil, nil
|
||||
if source == nil {
|
||||
return nil, fmt.Errorf("source is required")
|
||||
}
|
||||
|
||||
raw, specURL, err := s.readSpec(ctx, source)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
spec, err := parseOpenAPISpec(raw)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
docs := make([]*Document, 0)
|
||||
mainContent := buildMainSpecContent(spec)
|
||||
docs = append(docs, &Document{
|
||||
ID: generateDocID(specURL + "#openapi"),
|
||||
Source: coalesceSourceName(source.Name, "openapi"),
|
||||
Type: "openapi-spec",
|
||||
Title: spec.Info.Title,
|
||||
Content: mainContent,
|
||||
URL: specURL,
|
||||
Metadata: map[string]interface{}{
|
||||
"openapi": spec.Version,
|
||||
"servers": spec.Servers,
|
||||
},
|
||||
Hash: hashBytes(raw),
|
||||
Timestamp: time.Now(),
|
||||
})
|
||||
|
||||
paths := make([]string, 0, len(spec.Paths))
|
||||
for path := range spec.Paths {
|
||||
paths = append(paths, path)
|
||||
}
|
||||
sort.Strings(paths)
|
||||
|
||||
for _, p := range paths {
|
||||
opMap := spec.Paths[p]
|
||||
methods := make([]string, 0, len(opMap))
|
||||
for m := range opMap {
|
||||
methods = append(methods, strings.ToUpper(m))
|
||||
}
|
||||
sort.Strings(methods)
|
||||
|
||||
for _, method := range methods {
|
||||
op := opMap[strings.ToLower(method)]
|
||||
if op == nil {
|
||||
continue
|
||||
}
|
||||
title := strings.TrimSpace(op.Summary)
|
||||
if title == "" {
|
||||
title = fmt.Sprintf("%s %s", method, p)
|
||||
}
|
||||
content := buildOperationContent(method, p, op)
|
||||
docURL := fmt.Sprintf("%s#%s-%s", specURL, strings.ToLower(method), sanitizeFragment(p))
|
||||
docs = append(docs, &Document{
|
||||
ID: generateDocID(docURL),
|
||||
Source: coalesceSourceName(source.Name, "openapi"),
|
||||
Type: "openapi-operation",
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: docURL,
|
||||
Metadata: map[string]interface{}{
|
||||
"method": method,
|
||||
"path": p,
|
||||
"operation_id": op.OperationID,
|
||||
},
|
||||
Hash: hashString(content),
|
||||
Timestamp: time.Now(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return docs, nil
|
||||
}
|
||||
|
||||
// DetectChanges checks if the spec has been updated.
|
||||
func (s *OpenAPIScraper) DetectChanges(ctx context.Context, source *Source, lastHash string) (bool, string, error) {
|
||||
// TODO: Check spec content hash
|
||||
return false, "", nil
|
||||
if source == nil {
|
||||
return false, "", fmt.Errorf("source is required")
|
||||
}
|
||||
raw, _, err := s.readSpec(ctx, source)
|
||||
if err != nil {
|
||||
return false, "", err
|
||||
}
|
||||
hash := hashBytes(raw)
|
||||
return hash != lastHash, hash, nil
|
||||
}
|
||||
|
||||
func (s *OpenAPIScraper) readSpec(ctx context.Context, source *Source) ([]byte, string, error) {
|
||||
rawPath := strings.TrimSpace(source.URL)
|
||||
if rawPath == "" {
|
||||
rawPath = strings.TrimSpace(source.Path)
|
||||
}
|
||||
if rawPath == "" {
|
||||
return nil, "", fmt.Errorf("openapi source requires url or path")
|
||||
}
|
||||
|
||||
if strings.HasPrefix(rawPath, "http://") || strings.HasPrefix(rawPath, "https://") {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawPath, nil)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if s.config != nil && strings.TrimSpace(s.config.UserAgent) != "" {
|
||||
req.Header.Set("User-Agent", s.config.UserAgent)
|
||||
}
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return nil, "", fmt.Errorf("openapi fetch failed: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10<<20))
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
return body, rawPath, nil
|
||||
}
|
||||
|
||||
b, err := os.ReadFile(rawPath)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
return b, "file://" + rawPath, nil
|
||||
}
|
||||
|
||||
type openAPISpec struct {
|
||||
Version string `json:"openapi" yaml:"openapi"`
|
||||
Swagger string `json:"swagger" yaml:"swagger"`
|
||||
Info openAPIInfo `json:"info" yaml:"info"`
|
||||
Servers []openAPIServer `json:"servers" yaml:"servers"`
|
||||
Paths map[string]pathItems `json:"paths" yaml:"paths"`
|
||||
}
|
||||
|
||||
type openAPIInfo struct {
|
||||
Title string `json:"title" yaml:"title"`
|
||||
Version string `json:"version" yaml:"version"`
|
||||
Description string `json:"description" yaml:"description"`
|
||||
}
|
||||
|
||||
type openAPIServer struct {
|
||||
URL string `json:"url" yaml:"url"`
|
||||
Description string `json:"description" yaml:"description"`
|
||||
}
|
||||
|
||||
type pathItems map[string]*openAPIOperation
|
||||
|
||||
type openAPIOperation struct {
|
||||
Summary string `json:"summary" yaml:"summary"`
|
||||
Description string `json:"description" yaml:"description"`
|
||||
OperationID string `json:"operationId" yaml:"operationId"`
|
||||
Parameters []openAPIParameter `json:"parameters" yaml:"parameters"`
|
||||
Responses map[string]response `json:"responses" yaml:"responses"`
|
||||
RequestBody map[string]any `json:"requestBody" yaml:"requestBody"`
|
||||
Tags []string `json:"tags" yaml:"tags"`
|
||||
Deprecated bool `json:"deprecated" yaml:"deprecated"`
|
||||
Security []map[string][]string `json:"security" yaml:"security"`
|
||||
}
|
||||
|
||||
type openAPIParameter struct {
|
||||
Name string `json:"name" yaml:"name"`
|
||||
In string `json:"in" yaml:"in"`
|
||||
Description string `json:"description" yaml:"description"`
|
||||
Required bool `json:"required" yaml:"required"`
|
||||
}
|
||||
|
||||
type response struct {
|
||||
Description string `json:"description" yaml:"description"`
|
||||
}
|
||||
|
||||
func parseOpenAPISpec(raw []byte) (*openAPISpec, error) {
|
||||
var spec openAPISpec
|
||||
if err := json.Unmarshal(raw, &spec); err != nil {
|
||||
if yamlErr := yaml.Unmarshal(raw, &spec); yamlErr != nil {
|
||||
return nil, fmt.Errorf("invalid openapi content: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if strings.TrimSpace(spec.Info.Title) == "" {
|
||||
spec.Info.Title = "OpenAPI Specification"
|
||||
}
|
||||
if strings.TrimSpace(spec.Version) == "" {
|
||||
spec.Version = spec.Swagger
|
||||
}
|
||||
if spec.Paths == nil {
|
||||
spec.Paths = map[string]pathItems{}
|
||||
}
|
||||
|
||||
return &spec, nil
|
||||
}
|
||||
|
||||
func buildMainSpecContent(spec *openAPISpec) string {
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "# %s\n\n", spec.Info.Title)
|
||||
if spec.Info.Version != "" {
|
||||
fmt.Fprintf(&b, "- API Version: %s\n", spec.Info.Version)
|
||||
}
|
||||
if spec.Version != "" {
|
||||
fmt.Fprintf(&b, "- OpenAPI: %s\n", spec.Version)
|
||||
}
|
||||
fmt.Fprintf(&b, "- Paths: %d\n", len(spec.Paths))
|
||||
if spec.Info.Description != "" {
|
||||
fmt.Fprintf(&b, "\n%s\n", strings.TrimSpace(spec.Info.Description))
|
||||
}
|
||||
if len(spec.Servers) > 0 {
|
||||
fmt.Fprintf(&b, "\n## Servers\n")
|
||||
for _, s := range spec.Servers {
|
||||
fmt.Fprintf(&b, "- %s", s.URL)
|
||||
if s.Description != "" {
|
||||
fmt.Fprintf(&b, " - %s", s.Description)
|
||||
}
|
||||
fmt.Fprintln(&b)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func buildOperationContent(method, path string, op *openAPIOperation) string {
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "# %s %s\n\n", method, path)
|
||||
if op.Summary != "" {
|
||||
fmt.Fprintf(&b, "%s\n\n", strings.TrimSpace(op.Summary))
|
||||
}
|
||||
if op.Description != "" {
|
||||
fmt.Fprintf(&b, "%s\n\n", strings.TrimSpace(op.Description))
|
||||
}
|
||||
if op.OperationID != "" {
|
||||
fmt.Fprintf(&b, "- Operation ID: `%s`\n", op.OperationID)
|
||||
}
|
||||
if len(op.Tags) > 0 {
|
||||
fmt.Fprintf(&b, "- Tags: %s\n", strings.Join(op.Tags, ", "))
|
||||
}
|
||||
if op.Deprecated {
|
||||
fmt.Fprintln(&b, "- Deprecated: true")
|
||||
}
|
||||
if len(op.Parameters) > 0 {
|
||||
fmt.Fprintln(&b, "\n## Parameters")
|
||||
for _, p := range op.Parameters {
|
||||
req := "optional"
|
||||
if p.Required {
|
||||
req = "required"
|
||||
}
|
||||
fmt.Fprintf(&b, "- `%s` (%s, %s)", p.Name, p.In, req)
|
||||
if p.Description != "" {
|
||||
fmt.Fprintf(&b, ": %s", strings.TrimSpace(p.Description))
|
||||
}
|
||||
fmt.Fprintln(&b)
|
||||
}
|
||||
}
|
||||
if len(op.Responses) > 0 {
|
||||
codes := make([]string, 0, len(op.Responses))
|
||||
for code := range op.Responses {
|
||||
codes = append(codes, code)
|
||||
}
|
||||
sort.Strings(codes)
|
||||
fmt.Fprintln(&b, "\n## Responses")
|
||||
for _, code := range codes {
|
||||
resp := op.Responses[code]
|
||||
fmt.Fprintf(&b, "- `%s`", code)
|
||||
if resp.Description != "" {
|
||||
fmt.Fprintf(&b, ": %s", strings.TrimSpace(resp.Description))
|
||||
}
|
||||
fmt.Fprintln(&b)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func sanitizeFragment(path string) string {
|
||||
path = strings.ToLower(path)
|
||||
path = strings.ReplaceAll(path, "/", "-")
|
||||
path = strings.ReplaceAll(path, "{", "")
|
||||
path = strings.ReplaceAll(path, "}", "")
|
||||
path = strings.Trim(path, "-")
|
||||
if path == "" {
|
||||
return "root"
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func hashBytes(b []byte) string {
|
||||
h := sha256.Sum256(b)
|
||||
return hex.EncodeToString(h[:])
|
||||
}
|
||||
|
||||
func hashString(s string) string {
|
||||
h := sha256.Sum256([]byte(s))
|
||||
return hex.EncodeToString(h[:])
|
||||
}
|
||||
|
||||
func coalesceSourceName(name, fallback string) string {
|
||||
if strings.TrimSpace(name) != "" {
|
||||
return name
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user