#!/usr/bin/env bash
# detect-secrets.sh — Scans for accidentally committed secrets
#
# Usage:
#   bash scripts/detect-secrets.sh --staged-only   # pre-commit: check staged diff only
#   bash scripts/detect-secrets.sh --full-scan      # CI: scan entire working tree

set -euo pipefail

MODE="${1:-}"
VIOLATIONS=0

red()   { printf "\033[0;31m%s\033[0m\n" "$*"; }
green() { printf "\033[0;32m%s\033[0m\n" "$*"; }

if [[ "$MODE" == "--staged-only" ]]; then

  # ── Check 1: .env itself is not staged (allow deletions — only block additions/modifications) ──
  if git diff --cached --name-only --diff-filter=ACM | grep -E '(^|/)\.env(\.|$)' | grep -qvE '(\.env\.(example|testing|testing\.example))'; then
    red "ERROR: .env or .env.* file is staged for commit. Remove it from staging."
    VIOLATIONS=$((VIOLATIONS + 1))
  fi

  # ── Check 2: staged diff contains secret patterns ───────────────────────────
  STAGED_DIFF=$(git diff --cached --unified=0 2>/dev/null || true)

  if [[ -n "$STAGED_DIFF" ]]; then
    # Laravel app key
    if echo "$STAGED_DIFF" | grep -qE '^\+.*APP_KEY=base64:[A-Za-z0-9+/]{40,}={0,2}'; then
      red "ERROR: APP_KEY with real value detected in staged changes."
      VIOLATIONS=$((VIOLATIONS + 1))
    fi

    # Stripe live/test secret keys
    if echo "$STAGED_DIFF" | grep -qE '^\+.*STRIPE_SECRET=sk_(live|test)_[A-Za-z0-9]{20,}'; then
      red "ERROR: Stripe secret key detected in staged changes."
      VIOLATIONS=$((VIOLATIONS + 1))
    fi

    # AWS access key IDs
    if echo "$STAGED_DIFF" | grep -qE '^\+.*AKIA[0-9A-Z]{16}'; then
      red "ERROR: AWS access key ID pattern detected in staged changes."
      VIOLATIONS=$((VIOLATIONS + 1))
    fi

    # Generic _SECRET= or _KEY= with long real values (not placeholders)
    while IFS= read -r line; do
      if echo "$line" | grep -qE '^\+.*(_SECRET|_KEY)=[A-Za-z0-9+/]{40,}'; then
        if ! echo "$line" | grep -qiE '(your_|xxx|example|_here|placeholder|changeme|dummy)'; then
          red "ERROR: Possible secret value in staged changes: ${line:0:80}"
          VIOLATIONS=$((VIOLATIONS + 1))
        fi
      fi
    done < <(echo "$STAGED_DIFF")

    # password= with non-empty non-placeholder value
    # Excludes JSX boolean props (canResetPassword={true}), test helpers, and HTML attributes
    while IFS= read -r line; do
      if echo "$line" | grep -qiE '^\+.*password=[^$\s]{8,}'; then
        if ! echo "$line" | grep -qiE '(your_|xxx|example|_here|placeholder|changeme|dummy|\$\{|\$\(|=\{true\}|=\{false\}|Password\s*=\s*["\x27]|confirmPassword|resetPassword|canReset|showPassword|togglePassword|hasPassword|withPassword|passwordField|passwordInput|password_confirmation|\.test\.|describe\(|it\()'; then
          red "ERROR: Possible password value in staged changes: ${line:0:80}"
          VIOLATIONS=$((VIOLATIONS + 1))
        fi
      fi
    done < <(echo "$STAGED_DIFF")
  fi

elif [[ "$MODE" == "--full-scan" ]]; then

  # ── Full working tree scan (CI) ──────────────────────────────────────────────
  # Exclude: .env.example, *.test files, vendor/, node_modules/, .git/
  SCAN_DIRS="."
  # Exclusions are applied inline in find_files()

  find_files() {
    find "$SCAN_DIRS" -type f \
      ! -path "*/vendor/*" \
      ! -path "*/node_modules/*" \
      ! -path "*/.git/*" \
      ! -path "*/.worktrees/*" \
      ! -name "*.test" \
      ! -name ".env.example" \
      ! -name ".env.testing" \
      ! -name "detect-secrets.sh" \
      2>/dev/null
  }

  # Check .env is not present in the git index
  if git ls-files | grep -E '(^|/)\.env(\.|$)' | grep -qvE '(\.env\.(example|testing|testing\.example))'; then
    red "ERROR: .env or .env.* is tracked by git. Run: git rm --cached <file>"
    VIOLATIONS=$((VIOLATIONS + 1))
  fi

  while IFS= read -r file; do
    [[ -f "$file" ]] || continue

    # APP_KEY with real base64 value
    if grep -qE 'APP_KEY=base64:[A-Za-z0-9+/]{40,}={0,2}' "$file" 2>/dev/null; then
      red "ERROR: APP_KEY with real value in $file"
      VIOLATIONS=$((VIOLATIONS + 1))
    fi

    # Stripe live keys only (test keys may appear in docs/examples)
    if grep -qE 'STRIPE_SECRET=sk_live_[A-Za-z0-9]{20,}' "$file" 2>/dev/null; then
      red "ERROR: Stripe LIVE secret key in $file"
      VIOLATIONS=$((VIOLATIONS + 1))
    fi

    # AWS access key IDs (exclude test files and known documentation example keys)
    if grep -qE 'AKIA[0-9A-Z]{16}' "$file" 2>/dev/null; then
      # Skip files in tests/ directory (may contain example keys for sanitizer tests)
      if ! echo "$file" | grep -qE '^./tests/'; then
        # Skip lines containing 'EXAMPLE' (canonical AWS docs placeholder key)
        if grep -E 'AKIA[0-9A-Z]{16}' "$file" 2>/dev/null | grep -qvE 'EXAMPLE'; then
          red "ERROR: AWS access key ID pattern in $file"
          VIOLATIONS=$((VIOLATIONS + 1))
        fi
      fi
    fi

    # Generic _SECRET= or _KEY= with long real values (mirrors staged-only check)
    while IFS= read -r line; do
      if echo "$line" | grep -qE '(_SECRET|_KEY)=[A-Za-z0-9+/]{40,}'; then
        if ! echo "$line" | grep -qiE '(your_|xxx|example|_here|placeholder|changeme|dummy)'; then
          red "ERROR: Possible secret value in $file: ${line:0:80}"
          VIOLATIONS=$((VIOLATIONS + 1))
        fi
      fi
    done < <(grep -E '(_SECRET|_KEY)=' "$file" 2>/dev/null || true)

  done < <(find_files)

else
  echo "Usage: $0 --staged-only | --full-scan"
  exit 2
fi

if [[ "$VIOLATIONS" -gt 0 ]]; then
  red ""
  red "detect-secrets: $VIOLATIONS violation(s) found. Commit blocked."
  exit 1
else
  green "detect-secrets: No secrets detected."
  exit 0
fi
