secure-web/backend/scanner/zap_scanner.py

"""
OWASP ZAP Security Scanner Integration.

This module integrates with OWASP ZAP (Zed Attack Proxy) to perform
security vulnerability scanning.
"""

import time
import logging
from typing import Dict, List, Optional

import httpx

from django.conf import settings

from .base import BaseScanner, ScannerResult, ScannerStatus

logger = logging.getLogger('scanner')


class ZAPScanner(BaseScanner):
    """
    Security scanner using OWASP ZAP.

    Performs:
    - Spider crawling
    - Passive scanning
    - Active scanning (optional)
    - Security vulnerability detection
    """

    name = "owasp_zap"

    # ZAP risk levels mapped to our severity
    RISK_MAPPING = {
        '0': 'info',      # Informational
        '1': 'low',       # Low
        '2': 'medium',    # Medium
        '3': 'high',      # High
    }

    def __init__(self, config: dict = None):
        super().__init__(config)
        self.zap_url = self.config.get(
            'zap_url',
            settings.SCANNER_CONFIG.get('ZAP_HOST', 'http://zap:8080')
        )
        self.api_key = self.config.get(
            'api_key',
            settings.SCANNER_CONFIG.get('ZAP_API_KEY', '')
        )
        self.timeout = self.config.get(
            'timeout',
            settings.SCANNER_CONFIG.get('ZAP_TIMEOUT', 120)
        )
        # Whether to run active scan (slower, more intrusive)
        self.active_scan = self.config.get('active_scan', False)

    def is_available(self) -> bool:
        """Check if ZAP is available."""
        try:
            response = httpx.get(
                f"{self.zap_url}/JSON/core/view/version/",
                params={'apikey': self.api_key},
                timeout=10
            )
            return response.status_code == 200
        except Exception as e:
            self.logger.warning(f"ZAP not available: {e}")
            return False

    def run(self, url: str) -> ScannerResult:
        """
        Run ZAP security scan on the given URL.

        Args:
            url: The URL to scan

        Returns:
            ScannerResult with security findings
        """
        start_time = time.time()

        if not self.is_available():
            return ScannerResult(
                status=ScannerStatus.FAILED,
                scanner_name=self.name,
                error_message="OWASP ZAP is not available. Check ZAP service configuration.",
                execution_time_seconds=time.time() - start_time
            )

        try:
            # Access the URL to seed ZAP
            self._access_url(url)

            # Run spider to crawl the site
            self._run_spider(url)

            # Wait for passive scan to complete
            self._wait_for_passive_scan()

            # Optionally run active scan
            if self.active_scan:
                self._run_active_scan(url)

            # Get alerts
            alerts = self._get_alerts(url)

            # Process alerts into issues
            issues = self._process_alerts(alerts)

            # Calculate security score based on findings
            scores = self._calculate_scores(issues)

            raw_data = {
                'alerts': alerts,
                'alert_count': len(alerts),
                'scan_type': 'active' if self.active_scan else 'passive',
            }

            execution_time = time.time() - start_time

            return ScannerResult(
                status=ScannerStatus.SUCCESS,
                scanner_name=self.name,
                scores=scores,
                issues=issues,
                raw_data=raw_data,
                execution_time_seconds=execution_time
            )

        except httpx.TimeoutException:
            return ScannerResult(
                status=ScannerStatus.FAILED,
                scanner_name=self.name,
                error_message="ZAP scan timed out",
                execution_time_seconds=time.time() - start_time
            )
        except Exception as e:
            logger.exception(f"ZAP scan failed for {url}")
            return ScannerResult(
                status=ScannerStatus.FAILED,
                scanner_name=self.name,
                error_message=f"ZAP scan error: {e}",
                execution_time_seconds=time.time() - start_time
            )

    def _zap_request(self, endpoint: str, params: dict = None) -> dict:
        """Make a request to ZAP API."""
        params = params or {}
        params['apikey'] = self.api_key

        response = httpx.get(
            f"{self.zap_url}{endpoint}",
            params=params,
            timeout=self.timeout
        )
        response.raise_for_status()
        return response.json()

    def _access_url(self, url: str):
        """Access URL through ZAP to initialize scanning."""
        self.logger.info(f"Accessing URL through ZAP: {url}")
        self._zap_request('/JSON/core/action/accessUrl/', {'url': url})
        time.sleep(2)  # Give ZAP time to process

    def _run_spider(self, url: str):
        """Run ZAP spider to crawl the site."""
        self.logger.info(f"Starting ZAP spider for: {url}")

        # Start spider
        result = self._zap_request('/JSON/spider/action/scan/', {
            'url': url,
            'maxChildren': '10',  # Limit crawl depth
            'recurse': 'true',
        })

        scan_id = result.get('scan')
        if not scan_id:
            return

        # Wait for spider to complete (with timeout)
        max_wait = 60  # seconds
        waited = 0
        while waited < max_wait:
            status = self._zap_request('/JSON/spider/view/status/', {'scanId': scan_id})
            progress = int(status.get('status', '100'))

            if progress >= 100:
                break

            time.sleep(2)
            waited += 2

        self.logger.info("Spider completed")

    def _wait_for_passive_scan(self):
        """Wait for passive scanning to complete."""
        self.logger.info("Waiting for passive scan...")

        max_wait = 30
        waited = 0
        while waited < max_wait:
            result = self._zap_request('/JSON/pscan/view/recordsToScan/')
            records = int(result.get('recordsToScan', '0'))

            if records == 0:
                break

            time.sleep(2)
            waited += 2

        self.logger.info("Passive scan completed")

    def _run_active_scan(self, url: str):
        """Run active security scan (more intrusive)."""
        self.logger.info(f"Starting active scan for: {url}")

        result = self._zap_request('/JSON/ascan/action/scan/', {
            'url': url,
            'recurse': 'true',
            'inScopeOnly': 'false',
        })

        scan_id = result.get('scan')
        if not scan_id:
            return

        # Wait for active scan (with timeout)
        max_wait = 120
        waited = 0
        while waited < max_wait:
            status = self._zap_request('/JSON/ascan/view/status/', {'scanId': scan_id})
            progress = int(status.get('status', '100'))

            if progress >= 100:
                break

            time.sleep(5)
            waited += 5

        self.logger.info("Active scan completed")

    def _get_alerts(self, url: str) -> List[dict]:
        """Get all security alerts for the URL."""
        result = self._zap_request('/JSON/core/view/alerts/', {
            'baseurl': url,
            'start': '0',
            'count': '100',  # Limit alerts
        })

        return result.get('alerts', [])

    def _process_alerts(self, alerts: List[dict]) -> List[dict]:
        """Convert ZAP alerts to our issue format."""
        issues = []

        for alert in alerts:
            risk = alert.get('risk', '0')
            severity = self.RISK_MAPPING.get(risk, 'info')

            # Determine category based on alert type
            category = self._categorize_alert(alert)

            # Build remediation from ZAP's solution
            remediation = alert.get('solution', '')
            if alert.get('reference'):
                remediation += f"\n\nReferences: {alert.get('reference')}"

            issues.append(self._create_issue(
                category=category,
                severity=severity,
                title=alert.get('alert', 'Unknown vulnerability'),
                description=alert.get('description', ''),
                affected_url=alert.get('url'),
                remediation=remediation.strip() if remediation else None,
                raw_data={
                    'pluginId': alert.get('pluginId'),
                    'cweid': alert.get('cweid'),
                    'wascid': alert.get('wascid'),
                    'evidence': alert.get('evidence', '')[:200],  # Truncate
                    'param': alert.get('param'),
                    'attack': alert.get('attack', '')[:200],
                    'confidence': alert.get('confidence'),
                }
            ))

        return issues

    def _categorize_alert(self, alert: dict) -> str:
        """Categorize ZAP alert into our categories."""
        alert_name = alert.get('alert', '').lower()
        cwe_id = alert.get('cweid', '')

        # XSS related
        if 'xss' in alert_name or 'cross-site scripting' in alert_name or cwe_id == '79':
            return 'security'

        # SQL Injection
        if 'sql' in alert_name and 'injection' in alert_name or cwe_id == '89':
            return 'security'

        # Header related
        if any(h in alert_name for h in ['header', 'csp', 'hsts', 'x-frame', 'x-content-type']):
            return 'headers'

        # Cookie related
        if 'cookie' in alert_name:
            return 'security'

        # TLS/SSL related
        if any(t in alert_name for t in ['ssl', 'tls', 'certificate', 'https']):
            return 'tls'

        # CORS related
        if 'cors' in alert_name or 'cross-origin' in alert_name:
            return 'cors'

        # Default to security
        return 'security'

    def _calculate_scores(self, issues: List[dict]) -> dict:
        """Calculate security score based on issues found."""
        # Start at 100, deduct based on severity
        score = 100

        severity_deductions = {
            'critical': 25,
            'high': 15,
            'medium': 8,
            'low': 3,
            'info': 1,
        }

        for issue in issues:
            severity = issue.get('severity', 'info')
            score -= severity_deductions.get(severity, 0)

        return {
            'zap_security': max(0, min(100, score))
        }