File size: 4,404 Bytes
7b4f5dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/bin/bash
# =============================================================================
# benchmark.sh — Latency + throughput benchmark for CodeSentry
# Runs 10 analyses on the vulnerable fixture and outputs benchmark_results.json
# =============================================================================
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
FIXTURE="$PROJECT_ROOT/tests/fixtures/vulnerable_ml_code.py"
API_URL="${CODESENTRY_URL:-http://localhost:8000}"
RESULTS_FILE="$PROJECT_ROOT/benchmark_results.json"
RUNS="${BENCHMARK_RUNS:-10}"

echo "============================================================"
echo "  CodeSentry Benchmark"
echo "  API: $API_URL"
echo "  Runs: $RUNS"
echo "  Fixture: $FIXTURE"
echo "============================================================"

if [ ! -f "$FIXTURE" ]; then
  echo "ERROR: Fixture file not found: $FIXTURE"
  exit 1
fi

# Encode fixture code for JSON
FIXTURE_CODE=$(python3 -c "
import json, sys
code = open('$FIXTURE').read()
print(json.dumps(code))
")

# Collect timings
declare -a TOTAL_TIMES=()
declare -a TTFF_TIMES=()
TOTAL_FINDINGS=0

echo ""
echo "Running $RUNS benchmark iterations..."
echo ""

for i in $(seq 1 "$RUNS"); do
  SESSION_ID="bench-$(date +%s%N)-$i"
  START_TS=$(date +%s%N)
  FIRST_FINDING_TS=0
  END_TS=0

  PAYLOAD=$(python3 -c "
import json
print(json.dumps({
    'source': $FIXTURE_CODE,
    'source_type': 'code',
    'session_id': '$SESSION_ID'
}))
")

  FINDINGS_IN_RUN=0
  while IFS= read -r line; do
    if [[ "$line" == data:* ]]; then
      DATA="${line#data: }"
      if [ "$FIRST_FINDING_TS" -eq 0 ] && echo "$DATA" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); sys.exit(0 if d.get('event')!='finding' else 1)" 2>/dev/null; then
        :
      fi
      EVENT=$(echo "$DATA" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('event',''))" 2>/dev/null || echo "")
      if [[ "$EVENT" == "finding" ]] && [ "$FIRST_FINDING_TS" -eq 0 ]; then
        FIRST_FINDING_TS=$(date +%s%N)
        FINDINGS_IN_RUN=$((FINDINGS_IN_RUN + 1))
      fi
      if [[ "$EVENT" == "complete" ]]; then
        END_TS=$(date +%s%N)
      fi
    fi
  done < <(curl -sf -X POST "$API_URL/api/analyze" \
    -H "Content-Type: application/json" \
    -d "$PAYLOAD" \
    --no-buffer 2>/dev/null || true)

  if [ "$END_TS" -eq 0 ]; then
    END_TS=$(date +%s%N)
  fi

  TOTAL_MS=$(( (END_TS - START_TS) / 1000000 ))
  TTFF_MS=0
  if [ "$FIRST_FINDING_TS" -gt 0 ]; then
    TTFF_MS=$(( (FIRST_FINDING_TS - START_TS) / 1000000 ))
  fi

  TOTAL_TIMES+=("$TOTAL_MS")
  TTFF_TIMES+=("$TTFF_MS")
  TOTAL_FINDINGS=$((TOTAL_FINDINGS + FINDINGS_IN_RUN))

  echo "  Run $i: total=${TOTAL_MS}ms  ttff=${TTFF_MS}ms  findings=$FINDINGS_IN_RUN"
done

# Compute averages using Python
echo ""
echo "Computing results..."

python3 - <<PYEOF
import json, statistics

total_times = [${TOTAL_TIMES[*]:-0}]
ttff_times = [t for t in [${TTFF_TIMES[*]:-0}] if t > 0]

results = {
    "benchmark_config": {
        "runs": $RUNS,
        "fixture": "vulnerable_ml_code.py",
        "api_url": "$API_URL",
    },
    "latency_ms": {
        "total_analysis": {
            "mean": round(statistics.mean(total_times), 1) if total_times else 0,
            "median": round(statistics.median(total_times), 1) if total_times else 0,
            "min": min(total_times) if total_times else 0,
            "max": max(total_times) if total_times else 0,
            "stdev": round(statistics.stdev(total_times), 1) if len(total_times) > 1 else 0,
        },
        "time_to_first_finding": {
            "mean": round(statistics.mean(ttff_times), 1) if ttff_times else 0,
            "median": round(statistics.median(ttff_times), 1) if ttff_times else 0,
            "min": min(ttff_times) if ttff_times else 0,
            "max": max(ttff_times) if ttff_times else 0,
        },
    },
    "findings": {
        "total_across_runs": $TOTAL_FINDINGS,
        "avg_per_run": round($TOTAL_FINDINGS / $RUNS, 1),
    },
}

with open("$RESULTS_FILE", "w") as f:
    json.dump(results, f, indent=2)

print(json.dumps(results, indent=2))
PYEOF

echo ""
echo "============================================================"
echo "  Benchmark complete! Results saved to:"
echo "  $RESULTS_FILE"
echo "============================================================"