package analyzers import ( "context" "fmt" "go/ast" "go/token" "go/types" "os" "path/filepath" "regexp" "strings" "github.com/yourorg/devour/internal/quality" "golang.org/x/tools/go/packages" ) type DataFlowAnalyzer struct { fset *token.FileSet pkgs []*packages.Package findings []quality.Finding taintSrcs map[string][]TaintSource sanitizeFn map[string]bool } type TaintSource struct { Name string Package string Category string Description string } type DataFlowFinding struct { Source string Sink string Path []string Line int File string Severity quality.Severity Description string } func NewDataFlowAnalyzer() *DataFlowAnalyzer { d := &DataFlowAnalyzer{ findings: make([]quality.Finding, 0), taintSrcs: make(map[string][]TaintSource), } d.initTaintSources() d.initSanitizers() return d } func (d *DataFlowAnalyzer) initTaintSources() { d.taintSrcs["net/http"] = []TaintSource{ {Name: "FormValue", Package: "net/http", Category: "http-input", Description: "HTTP form value - user controlled"}, {Name: "PostFormValue", Package: "net/http", Category: "http-input", Description: "HTTP POST form value - user controlled"}, {Name: "FormFile", Package: "net/http", Category: "http-input", Description: "HTTP uploaded file - user controlled"}, {Name: "Cookie", Package: "net/http", Category: "http-input", Description: "HTTP cookie - user controlled"}, {Name: "Header", Package: "net/http", Category: "http-input", Description: "HTTP header - user controlled"}, {Name: "URL", Package: "net/http", Category: "http-input", Description: "Request URL - user controlled"}, {Name: "Body", Package: "net/http", Category: "http-input", Description: "Request body - user controlled"}, } d.taintSrcs["os"] = []TaintSource{ {Name: "Getenv", Package: "os", Category: "env", Description: "Environment variable - environment controlled"}, {Name: "Args", Package: "os", Category: "cli", Description: "Command line arguments - user controlled"}, {Name: "Stdin", Package: "os", Category: "io", Description: "Standard input - user controlled"}, } d.taintSrcs["bufio"] = []TaintSource{ {Name: "ReadString", Package: "bufio", Category: "io", Description: "Reader input - potentially user controlled"}, {Name: "ReadBytes", Package: "bufio", Category: "io", Description: "Reader input - potentially user controlled"}, {Name: "ReadLine", Package: "bufio", Category: "io", Description: "Reader input - potentially user controlled"}, } d.taintSrcs["io"] = []TaintSource{ {Name: "ReadAll", Package: "io", Category: "io", Description: "Read all from reader - potentially user controlled"}, } } func (d *DataFlowAnalyzer) initSanitizers() { d.sanitizeFn = map[string]bool{ "html.EscapeString": true, "template.HTMLEscape": true, "template.JSEscape": true, "url.QueryEscape": true, "url.PathEscape": true, "sql.Named": true, "regexp.QuoteMeta": true, "strconv.Quote": true, } } func (d *DataFlowAnalyzer) Name() string { return "dataflow" } func (d *DataFlowAnalyzer) Severity() quality.Severity { return quality.SeverityT3 } func (d *DataFlowAnalyzer) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) { cfg := &packages.Config{ Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedFiles | packages.NeedSyntax, Dir: path, } pkgs, err := packages.Load(cfg, "./...") if err != nil { return nil, fmt.Errorf("failed to load packages: %w", err) } d.pkgs = pkgs d.fset = pkgs[0].Fset for _, pkg := range pkgs { d.analyzePackage(pkg) } return d.findings, nil } func (d *DataFlowAnalyzer) analyzePackage(pkg *packages.Package) { for _, file := range pkg.Syntax { d.analyzeFile(pkg, file) } } func (d *DataFlowAnalyzer) analyzeFile(pkg *packages.Package, file *ast.File) { tainted := make(map[string]TaintSource) propagations := make(map[string][]string) ast.Inspect(file, func(n ast.Node) bool { switch node := n.(type) { case *ast.AssignStmt: d.analyzeAssignment(pkg, node, tainted, propagations) case *ast.CallExpr: d.analyzeCall(pkg, node, tainted, file) case *ast.ValueSpec: d.analyzeValueSpec(pkg, node, tainted) } return true }) } func (d *DataFlowAnalyzer) analyzeAssignment(pkg *packages.Package, node *ast.AssignStmt, tainted map[string]TaintSource, propagations map[string][]string) { for i, expr := range node.Lhs { if ident, ok := expr.(*ast.Ident); ok { if i < len(node.Rhs) { if source := d.getTaintSource(pkg, node.Rhs[i]); source != nil { tainted[ident.Name] = *source } if rhsIdent, ok := node.Rhs[i].(*ast.Ident); ok { if t, exists := tainted[rhsIdent.Name]; exists { tainted[ident.Name] = t } } } } } } func (d *DataFlowAnalyzer) analyzeCall(pkg *packages.Package, node *ast.CallExpr, tainted map[string]TaintSource, file *ast.File) { fnName := d.getCallName(node) if d.isDangerousSink(fnName) { for _, arg := range node.Args { if ident, ok := arg.(*ast.Ident); ok { if source, exists := tainted[ident.Name]; exists { pos := d.fset.Position(node.Pos()) d.findings = append(d.findings, quality.Finding{ ID: fmt.Sprintf("taint-flow::%s::%d", pos.Filename, pos.Line), Type: "security", Title: fmt.Sprintf("Tainted data flows to dangerous sink: %s", fnName), Description: fmt.Sprintf("User-controlled input from %s flows to %s without sanitization. This may lead to injection vulnerabilities.", source.Description, fnName), File: pos.Filename, Line: pos.Line, Severity: quality.SeverityT4, Score: 8, Status: quality.StatusOpen, Metadata: map[string]string{ "source": source.Name, "source_type": source.Category, "sink": fnName, "variable": ident.Name, }, }) } } } } for _, arg := range node.Args { d.checkSQLInjection(pkg, arg, tainted, node) d.checkCommandInjection(pkg, arg, tainted, node) d.checkPathTraversal(pkg, arg, tainted, node) } } func (d *DataFlowAnalyzer) getTaintSource(pkg *packages.Package, expr ast.Expr) *TaintSource { call, ok := expr.(*ast.CallExpr) if !ok { return nil } sel, ok := call.Fun.(*ast.SelectorExpr) if !ok { return nil } pkgIdent, ok := sel.X.(*ast.Ident) if !ok { return nil } pkgName := pkgIdent.Name fnName := sel.Sel.Name if sources, exists := d.taintSrcs[pkgName]; exists { for _, src := range sources { if src.Name == fnName { return &src } } } if obj := pkg.TypesInfo.Uses[pkgIdent]; obj != nil { if pkgObj, ok := obj.(*types.PkgName); ok { if sources, exists := d.taintSrcs[pkgObj.Imported().Path()]; exists { for _, src := range sources { if src.Name == fnName { return &src } } } } } return nil } func (d *DataFlowAnalyzer) getCallName(node *ast.CallExpr) string { switch fn := node.Fun.(type) { case *ast.SelectorExpr: if ident, ok := fn.X.(*ast.Ident); ok { return ident.Name + "." + fn.Sel.Name } return fn.Sel.Name case *ast.Ident: return fn.Name } return "" } func (d *DataFlowAnalyzer) isDangerousSink(fnName string) bool { dangerousSinks := map[string]bool{ "exec.Command": true, "exec.CommandContext": true, "os/exec.Command": true, "db.Exec": true, "db.Query": true, "db.QueryRow": true, "sql.DB.Exec": true, "sql.DB.Query": true, "os.WriteFile": true, "os.Create": true, "os.OpenFile": true, "ioutil.WriteFile": true, "template.Parse": true, "html.template.Parse": true, "fmt.Fprintf": true, "fmt.Printf": true, "fmt.Sprintf": true, } return dangerousSinks[fnName] } func (d *DataFlowAnalyzer) checkSQLInjection(pkg *packages.Package, arg ast.Expr, tainted map[string]TaintSource, node *ast.CallExpr) { fnName := d.getCallName(node) if !strings.Contains(fnName, "Exec") && !strings.Contains(fnName, "Query") { return } if basic, ok := arg.(*ast.BasicLit); ok { query := strings.Trim(basic.Value, "`\"") if strings.Contains(query, "%s") || strings.Contains(query, "%v") || strings.Contains(query, "+") { pos := d.fset.Position(node.Pos()) d.findings = append(d.findings, quality.Finding{ ID: fmt.Sprintf("sql-injection::%s::%d", pos.Filename, pos.Line), Type: "security", Title: "Potential SQL injection vulnerability", Description: "SQL query constructed with string formatting. Use parameterized queries instead.", File: pos.Filename, Line: pos.Line, Severity: quality.SeverityT4, Score: 10, Status: quality.StatusOpen, Metadata: map[string]string{ "vulnerability": "sql-injection", "pattern": "string-formatting-in-query", }, }) } } } func (d *DataFlowAnalyzer) checkCommandInjection(pkg *packages.Package, arg ast.Expr, tainted map[string]TaintSource, node *ast.CallExpr) { fnName := d.getCallName(node) if !strings.Contains(fnName, "exec.Command") { return } if ident, ok := arg.(*ast.Ident); ok { if _, exists := tainted[ident.Name]; exists { pos := d.fset.Position(node.Pos()) d.findings = append(d.findings, quality.Finding{ ID: fmt.Sprintf("command-injection::%s::%d", pos.Filename, pos.Line), Type: "security", Title: "Potential command injection vulnerability", Description: "User-controlled input flows to exec.Command. Sanitize or validate input before use.", File: pos.Filename, Line: pos.Line, Severity: quality.SeverityT4, Score: 10, Status: quality.StatusOpen, Metadata: map[string]string{ "vulnerability": "command-injection", "variable": ident.Name, }, }) } } } func (d *DataFlowAnalyzer) checkPathTraversal(pkg *packages.Package, arg ast.Expr, tainted map[string]TaintSource, node *ast.CallExpr) { fnName := d.getCallName(node) pathFunctions := map[string]bool{ "os.Open": true, "os.OpenFile": true, "os.Create": true, "os.WriteFile": true, "os.ReadFile": true, "ioutil.ReadFile": true, "ioutil.WriteFile": true, "filepath.Join": true, "filepath.Walk": true, } if !pathFunctions[fnName] { return } if ident, ok := arg.(*ast.Ident); ok { if _, exists := tainted[ident.Name]; exists { pos := d.fset.Position(node.Pos()) d.findings = append(d.findings, quality.Finding{ ID: fmt.Sprintf("path-traversal::%s::%d", pos.Filename, pos.Line), Type: "security", Title: "Potential path traversal vulnerability", Description: "User-controlled input used in file path operation. Validate and sanitize paths.", File: pos.Filename, Line: pos.Line, Severity: quality.SeverityT4, Score: 8, Status: quality.StatusOpen, Metadata: map[string]string{ "vulnerability": "path-traversal", "variable": ident.Name, }, }) } } } func (d *DataFlowAnalyzer) analyzeValueSpec(pkg *packages.Package, node *ast.ValueSpec, tainted map[string]TaintSource) { for i, name := range node.Names { if i < len(node.Values) { if source := d.getTaintSource(pkg, node.Values[i]); source != nil { tainted[name.Name] = *source } } } } type SecretsDetector struct { patterns []SecretPattern } type SecretPattern struct { Name string Pattern *regexp.Regexp Severity quality.Severity } func NewSecretsDetector() *SecretsDetector { d := &SecretsDetector{ patterns: []SecretPattern{ {Name: "AWS Access Key", Pattern: regexp.MustCompile(`AKIA[0-9A-Z]{16}`), Severity: quality.SeverityT4}, {Name: "AWS Secret Key", Pattern: regexp.MustCompile(`(?i)aws(.{0,20})?['\"][0-9a-zA-Z/+=]{40}['\"]`), Severity: quality.SeverityT4}, {Name: "GitHub Token", Pattern: regexp.MustCompile(`ghp_[0-9a-zA-Z]{36}`), Severity: quality.SeverityT4}, {Name: "GitHub OAuth", Pattern: regexp.MustCompile(`gho_[0-9a-zA-Z]{36}`), Severity: quality.SeverityT4}, {Name: "GitHub App Token", Pattern: regexp.MustCompile(`(ghu|ghs)_[0-9a-zA-Z]{36}`), Severity: quality.SeverityT4}, {Name: "Slack Token", Pattern: regexp.MustCompile(`xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9]{24}`), Severity: quality.SeverityT4}, {Name: "RSA Private Key", Pattern: regexp.MustCompile(`-----BEGIN RSA PRIVATE KEY-----`), Severity: quality.SeverityT4}, {Name: "Private Key", Pattern: regexp.MustCompile(`-----BEGIN PRIVATE KEY-----`), Severity: quality.SeverityT4}, {Name: "JWT", Pattern: regexp.MustCompile(`eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*`), Severity: quality.SeverityT3}, {Name: "Generic API Key", Pattern: regexp.MustCompile(`(?i)(api_key|apikey|secret|password|token)\s*[=:]\s*['"][^'"]{8,}['"]`), Severity: quality.SeverityT3}, {Name: "DB Connection String", Pattern: regexp.MustCompile(`(?i)(mysql|postgres|mongodb)://[^:]+:[^@]+@[^/]+`), Severity: quality.SeverityT4}, }, } return d } func (d *SecretsDetector) Name() string { return "secrets" } func (d *SecretsDetector) Severity() quality.Severity { return quality.SeverityT4 } func (d *SecretsDetector) Detect(ctx context.Context, path string, config *quality.Config) ([]quality.Finding, error) { var findings []quality.Finding err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error { if err != nil || info.IsDir() { return nil } ext := filepath.Ext(filePath) if ext != ".go" && ext != ".ts" && ext != ".js" && ext != ".py" && ext != ".java" && ext != ".yaml" && ext != ".yml" && ext != ".json" && ext != ".env" && ext != "" { return nil } if strings.Contains(filePath, "_test.go") || strings.Contains(filePath, "vendor/") || strings.Contains(filePath, "node_modules/") { return nil } data, err := os.ReadFile(filePath) if err != nil { return nil } content := string(data) for _, pattern := range d.patterns { matches := pattern.Pattern.FindAllStringIndex(content, -1) for _, match := range matches { line := strings.Count(content[:match[0]], "\n") + 1 findings = append(findings, quality.Finding{ ID: fmt.Sprintf("secret::%s::%d::%s", filePath, line, pattern.Name), Type: "security", Title: fmt.Sprintf("Potential %s detected", pattern.Name), Description: fmt.Sprintf("A potential %s was found in source code. Remove it and use environment variables or secret management.", pattern.Name), File: filePath, Line: line, Severity: pattern.Severity, Score: 10, Status: quality.StatusOpen, Metadata: map[string]string{ "secret_type": pattern.Name, }, }) } } return nil }) if err != nil { return nil, err } return findings, nil }