Skip to content

Commit dec52c4

Browse files
ccojocarclaude
andauthored
fix: prevent taint analysis hang on packages with many CHA call graph edges (#1608) (#1610)
Cap incoming call graph edges to 32 per function and add cross-query parameter taint memoization to prevent combinatorial explosion when CHA over-approximates interface method calls across transitive dependencies. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a0de8b6 commit dec52c4

File tree

2 files changed

+160
-16
lines changed

2 files changed

+160
-16
lines changed

taint/analyzer_internal_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package taint
22

33
import (
4+
"fmt"
45
"go/ast"
56
"go/constant"
67
"go/parser"
@@ -9,6 +10,7 @@ import (
910
"os"
1011
"path/filepath"
1112
"testing"
13+
"time"
1214

1315
"golang.org/x/tools/go/analysis"
1416
"golang.org/x/tools/go/analysis/passes/buildssa"
@@ -441,6 +443,109 @@ func f(w W) { w.Write([]byte("hello")) }
441443
_ = analyzer.Analyze(prog, []*ssa.Function{fn})
442444
}
443445

446+
// buildManySinkCallsFixture creates an SSA program with many interface implementations
447+
// and many sink-calling functions, producing a large CHA call graph. Used by both the
448+
// regression test and benchmark.
449+
func buildManySinkCallsFixture(tb testing.TB) (*ssa.Program, []*ssa.Function) {
450+
tb.Helper()
451+
452+
src := `package p
453+
454+
type W interface{ Write([]byte) (int, error) }
455+
`
456+
// Generate 20 concrete implementations of W to inflate CHA edges.
457+
for i := 0; i < 20; i++ {
458+
src += fmt.Sprintf(`
459+
type Impl%d struct{}
460+
func (x *Impl%d) Write(p []byte) (int, error) { return len(p), nil }
461+
`, i, i)
462+
}
463+
464+
// Generate 20 functions, each calling w.Write with a variable arg (potential sink).
465+
for i := 0; i < 20; i++ {
466+
src += fmt.Sprintf(`
467+
func caller%d(w W, data []byte) { w.Write(data) }
468+
`, i)
469+
}
470+
471+
fset := token.NewFileSet()
472+
parsed, err := parser.ParseFile(fset, "p.go", src, 0)
473+
if err != nil {
474+
tb.Fatalf("parse: %v", err)
475+
}
476+
info := &types.Info{
477+
Types: make(map[ast.Expr]types.TypeAndValue),
478+
Defs: make(map[*ast.Ident]types.Object),
479+
Uses: make(map[*ast.Ident]types.Object),
480+
Implicits: make(map[ast.Node]types.Object),
481+
Scopes: make(map[ast.Node]*types.Scope),
482+
Selections: make(map[*ast.SelectorExpr]*types.Selection),
483+
}
484+
pkg, err := (&types.Config{}).Check("p", fset, []*ast.File{parsed}, info)
485+
if err != nil {
486+
tb.Fatalf("type-check: %v", err)
487+
}
488+
prog := ssa.NewProgram(fset, ssa.BuilderMode(0))
489+
ssaPkg := prog.CreatePackage(pkg, []*ast.File{parsed}, info, true)
490+
prog.Build()
491+
492+
// Collect all caller* functions as analysis targets.
493+
var srcFuncs []*ssa.Function
494+
for i := 0; i < 20; i++ {
495+
fn := ssaPkg.Func(fmt.Sprintf("caller%d", i))
496+
if fn == nil {
497+
tb.Fatalf("SSA function caller%d not found", i)
498+
}
499+
srcFuncs = append(srcFuncs, fn)
500+
}
501+
502+
return prog, srcFuncs
503+
}
504+
505+
func TestTaintAnalysisPerformanceWithManySinkCalls(t *testing.T) {
506+
t.Parallel()
507+
508+
// This test verifies that taint analysis completes in bounded time even when
509+
// CHA produces a large call graph (many interface implementations × many sink calls).
510+
// Before the maxCallerEdges cap and paramTaintCache, this scenario could hang.
511+
prog, srcFuncs := buildManySinkCallsFixture(t)
512+
513+
analyzer := New(&Config{
514+
Sinks: []Sink{
515+
{Package: "p", Receiver: "W", Method: "Write", CheckArgs: []int{1}},
516+
},
517+
})
518+
519+
// Must complete within 10 seconds; without the fix this could hang indefinitely.
520+
done := make(chan []Result, 1)
521+
go func() {
522+
done <- analyzer.Analyze(prog, srcFuncs)
523+
}()
524+
525+
select {
526+
case <-time.After(10 * time.Second):
527+
t.Fatal("taint analysis did not complete within 10 seconds — possible hang regression")
528+
case results := <-done:
529+
_ = results
530+
}
531+
}
532+
533+
func BenchmarkTaintAnalysisManySinkCalls(b *testing.B) {
534+
prog, srcFuncs := buildManySinkCallsFixture(b)
535+
536+
cfg := &Config{
537+
Sinks: []Sink{
538+
{Package: "p", Receiver: "W", Method: "Write", CheckArgs: []int{1}},
539+
},
540+
}
541+
542+
b.ResetTimer()
543+
for b.Loop() {
544+
analyzer := New(cfg)
545+
analyzer.Analyze(prog, srcFuncs)
546+
}
547+
}
548+
444549
func TestResolveOriginalTypeMakeInterface(t *testing.T) {
445550
t.Parallel()
446551
// Build a minimal, self-contained SSA program (no external imports) that

taint/taint.go

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ import (
2323
// maxTaintDepth limits recursion depth to prevent stack overflow on large codebases
2424
const maxTaintDepth = 50
2525

26+
// maxCallerEdges caps the number of incoming call graph edges examined per function
27+
// in isParameterTainted. CHA over-approximates call graphs (every interface method
28+
// call fans out to ALL implementations), so a function can have thousands of callers.
29+
// Real taint flows come from direct/nearby callers, not the 33rd+ CHA-generated edge.
30+
const maxCallerEdges = 32
31+
2632
// isContextType checks if a type is context.Context.
2733
// context.Context is a control-flow mechanism (deadlines, cancellation, request-scoped values)
2834
// that does not carry user-controlled data relevant to taint sinks like XSS.
@@ -211,14 +217,21 @@ type Config struct {
211217
}
212218

213219
// Analyzer performs taint analysis on SSA programs.
220+
// paramKey identifies a specific parameter of a function for memoization.
221+
type paramKey struct {
222+
fn *ssa.Function
223+
paramIdx int
224+
}
225+
214226
type Analyzer struct {
215-
config *Config
216-
sources map[string]Source // keyed by full type string
217-
funcSrcs map[string]Source // function sources keyed by "pkg.Func"
218-
sinks map[string]Sink // keyed by full function string
219-
sanitizers map[string]struct{} // keyed by full function string
220-
callGraph *callgraph.Graph
221-
prog *ssa.Program // set at Analyze time for ArgTypeGuards resolution
227+
config *Config
228+
sources map[string]Source // keyed by full type string
229+
funcSrcs map[string]Source // function sources keyed by "pkg.Func"
230+
sinks map[string]Sink // keyed by full function string
231+
sanitizers map[string]struct{} // keyed by full function string
232+
callGraph *callgraph.Graph
233+
prog *ssa.Program // set at Analyze time for ArgTypeGuards resolution
234+
paramTaintCache map[paramKey]bool // caches true results from isParameterTainted
222235
}
223236

224237
// SetCallGraph injects a precomputed call graph.
@@ -309,13 +322,17 @@ func (a *Analyzer) Analyze(prog *ssa.Program, srcFuncs []*ssa.Function) []Result
309322
a.callGraph = cha.CallGraph(prog)
310323
}
311324

325+
a.paramTaintCache = make(map[paramKey]bool)
326+
312327
var results []Result
313328

314329
// Find all sink calls in the program
315330
for _, fn := range srcFuncs {
316331
results = append(results, a.analyzeFunctionSinks(fn)...)
317332
}
318333

334+
a.paramTaintCache = nil
335+
319336
return results
320337
}
321338

@@ -824,11 +841,31 @@ func (a *Analyzer) isParameterTainted(param *ssa.Parameter, fn *ssa.Function, vi
824841
return false
825842
}
826843

844+
// Resolve paramIdx early so we can use it for cache lookups.
845+
paramIdx := -1
846+
for i, p := range fn.Params {
847+
if p == param {
848+
paramIdx = i
849+
break
850+
}
851+
}
852+
853+
// Check memoization cache (only true results are cached).
854+
if paramIdx >= 0 && a.paramTaintCache != nil {
855+
key := paramKey{fn: fn, paramIdx: paramIdx}
856+
if a.paramTaintCache[key] {
857+
return true
858+
}
859+
}
860+
827861
// Check if parameter type is a source type.
828862
// This is the ONLY place where type-based source matching should trigger
829863
// automatic taint — because parameters represent data flowing IN from
830864
// external callers we don't control.
831865
if a.isSourceType(param.Type()) {
866+
if paramIdx >= 0 && a.paramTaintCache != nil {
867+
a.paramTaintCache[paramKey{fn: fn, paramIdx: paramIdx}] = true
868+
}
832869
return true
833870
}
834871

@@ -842,14 +879,6 @@ func (a *Analyzer) isParameterTainted(param *ssa.Parameter, fn *ssa.Function, vi
842879
return false
843880
}
844881

845-
paramIdx := -1
846-
for i, p := range fn.Params {
847-
if p == param {
848-
paramIdx = i
849-
break
850-
}
851-
}
852-
853882
if paramIdx < 0 {
854883
return false
855884
}
@@ -867,8 +896,14 @@ func (a *Analyzer) isParameterTainted(param *ssa.Parameter, fn *ssa.Function, vi
867896
adjustedIdx = paramIdx
868897
}
869898

870-
// Check each caller
899+
// Check each caller, capping at maxCallerEdges to avoid combinatorial
900+
// explosion from CHA over-approximation of interface method calls.
901+
edgesChecked := 0
871902
for _, inEdge := range node.In {
903+
if edgesChecked >= maxCallerEdges {
904+
break
905+
}
906+
872907
site := inEdge.Site
873908
if site == nil {
874909
continue
@@ -877,7 +912,11 @@ func (a *Analyzer) isParameterTainted(param *ssa.Parameter, fn *ssa.Function, vi
877912
callArgs := site.Common().Args
878913

879914
if adjustedIdx < len(callArgs) {
915+
edgesChecked++
880916
if a.isTainted(callArgs[adjustedIdx], inEdge.Caller.Func, visited, depth+1) {
917+
if a.paramTaintCache != nil {
918+
a.paramTaintCache[paramKey{fn: fn, paramIdx: paramIdx}] = true
919+
}
881920
return true
882921
}
883922
}

0 commit comments

Comments
 (0)