secure-web/backend/scanner/scanners/zap.py

"""
OWASP ZAP Scanner Integration.

This module integrates with OWASP ZAP for security scanning,
detecting vulnerabilities like XSS, injection flaws, and
misconfigurations.
"""

import logging
import time
from typing import Any, Dict, List, Optional

import httpx

from django.conf import settings

from .base import (
    BaseScanner,
    ScannerResult,
    ScannerStatus,
    IssueData,
    MetricData,
)

logger = logging.getLogger(__name__)


class ZAPScanner(BaseScanner):
    """
    Scanner using OWASP ZAP for security vulnerability detection.

    Performs baseline scans to identify common security issues:
    - XSS vulnerabilities
    - SQL injection patterns
    - Insecure cookies
    - Missing security headers
    - SSL/TLS issues
    - And more...
    """

    name = "owasp_zap"

    def __init__(self, config: Optional[Dict[str, Any]] = None):
        super().__init__(config)
        scanner_config = settings.SCANNER_CONFIG
        self.zap_host = self.config.get('zap_host', scanner_config.get('ZAP_HOST', 'http://zap:8080'))
        self.api_key = self.config.get('api_key', scanner_config.get('ZAP_API_KEY', ''))
        self.timeout = self.config.get('timeout', scanner_config.get('ZAP_TIMEOUT', 120))

    def is_available(self) -> bool:
        """Check if ZAP service is available."""
        try:
            with httpx.Client(timeout=10) as client:
                response = client.get(
                    f"{self.zap_host}/JSON/core/view/version/",
                    params={'apikey': self.api_key}
                )
                return response.status_code == 200
        except Exception as e:
            self.logger.warning(f"ZAP service not available: {e}")
            return False

    def run(self, url: str) -> ScannerResult:
        """
        Run ZAP security scan against the URL.

        Args:
            url: The URL to scan

        Returns:
            ScannerResult with security findings
        """
        self.logger.info(f"Starting ZAP scan for {url}")

        try:
            # Access the target to populate ZAP's site tree
            self._access_url(url)

            # Spider the site (limited crawl)
            self._spider_url(url)

            # Run active scan
            self._active_scan(url)

            # Get alerts
            alerts = self._get_alerts(url)

            return self._parse_results(url, alerts)

        except httpx.TimeoutException:
            return self._create_error_result(
                Exception("ZAP scan timed out")
            )
        except httpx.HTTPStatusError as e:
            return self._create_error_result(
                Exception(f"ZAP service error: {e.response.status_code}")
            )
        except Exception as e:
            return self._create_error_result(e)

    def _zap_request(self, endpoint: str, params: Optional[Dict] = None) -> Dict:
        """Make a request to the ZAP API."""
        if params is None:
            params = {}
        params['apikey'] = self.api_key

        with httpx.Client(timeout=self.timeout) as client:
            response = client.get(
                f"{self.zap_host}{endpoint}",
                params=params
            )
            response.raise_for_status()
            return response.json()

    def _access_url(self, url: str) -> None:
        """Access the URL to add it to ZAP's site tree."""
        self.logger.debug(f"Accessing URL in ZAP: {url}")
        self._zap_request(
            '/JSON/core/action/accessUrl/',
            {'url': url, 'followRedirects': 'true'}
        )
        time.sleep(2)  # Wait for ZAP to process

    def _spider_url(self, url: str) -> None:
        """Spider the URL to discover pages."""
        self.logger.debug(f"Spidering URL: {url}")

        # Start spider
        result = self._zap_request(
            '/JSON/spider/action/scan/',
            {
                'url': url,
                'maxChildren': '5',  # Limited crawl
                'recurse': 'true',
                'subtreeOnly': 'true'
            }
        )

        scan_id = result.get('scan')
        if not scan_id:
            return

        # Wait for spider to complete (with timeout)
        start_time = time.time()
        while time.time() - start_time < 60:  # 60 second spider timeout
            status = self._zap_request(
                '/JSON/spider/view/status/',
                {'scanId': scan_id}
            )
            if int(status.get('status', '100')) >= 100:
                break
            time.sleep(2)

    def _active_scan(self, url: str) -> None:
        """Run active scan against the URL."""
        self.logger.debug(f"Starting active scan: {url}")

        # Start active scan
        result = self._zap_request(
            '/JSON/ascan/action/scan/',
            {
                'url': url,
                'recurse': 'true',
                'inScopeOnly': 'true'
            }
        )

        scan_id = result.get('scan')
        if not scan_id:
            return

        # Wait for scan to complete (with timeout)
        start_time = time.time()
        while time.time() - start_time < self.timeout:
            status = self._zap_request(
                '/JSON/ascan/view/status/',
                {'scanId': scan_id}
            )
            if int(status.get('status', '100')) >= 100:
                break
            time.sleep(5)

    def _get_alerts(self, url: str) -> List[Dict]:
        """Get alerts for the scanned URL."""
        self.logger.debug(f"Fetching alerts for: {url}")

        result = self._zap_request(
            '/JSON/core/view/alerts/',
            {
                'baseurl': url,
                'start': '0',
                'count': '100'  # Limit alerts
            }
        )

        return result.get('alerts', [])

    def _parse_results(self, url: str, alerts: List[Dict]) -> ScannerResult:
        """
        Parse ZAP alerts into ScannerResult format.

        Args:
            url: The scanned URL
            alerts: List of ZAP alerts

        Returns:
            Parsed ScannerResult
        """
        issues = []
        metrics = []

        # Count alerts by risk level
        risk_counts = {
            'High': 0,
            'Medium': 0,
            'Low': 0,
            'Informational': 0
        }

        for alert in alerts:
            risk = alert.get('risk', 'Informational')
            risk_counts[risk] = risk_counts.get(risk, 0) + 1

            severity = self._map_risk_to_severity(risk)

            issues.append(IssueData(
                category='security',
                severity=severity,
                title=alert.get('name', 'Unknown vulnerability'),
                description=self._format_description(alert),
                tool='owasp_zap',
                affected_url=alert.get('url', url),
                remediation=alert.get('solution', 'Review and fix the vulnerability.'),
                raw_data={
                    'alert_ref': alert.get('alertRef'),
                    'cweid': alert.get('cweid'),
                    'wascid': alert.get('wascid'),
                    'confidence': alert.get('confidence'),
                    'evidence': alert.get('evidence', '')[:500],  # Truncate evidence
                }
            ))

        # Create metrics for vulnerability counts
        for risk_level, count in risk_counts.items():
            if count > 0:
                metrics.append(MetricData(
                    name=f'zap_{risk_level.lower()}_alerts',
                    display_name=f'{risk_level} Risk Alerts',
                    value=float(count),
                    unit='count',
                    source='owasp_zap'
                ))

        metrics.append(MetricData(
            name='total_security_alerts',
            display_name='Total Security Alerts',
            value=float(len(alerts)),
            unit='count',
            source='owasp_zap'
        ))

        self.logger.info(
            f"ZAP scan complete: {len(alerts)} alerts "
            f"(High: {risk_counts['High']}, Medium: {risk_counts['Medium']}, "
            f"Low: {risk_counts['Low']})"
        )

        return ScannerResult(
            scanner_name=self.name,
            status=ScannerStatus.SUCCESS,
            issues=issues,
            metrics=metrics,
            raw_data={
                'total_alerts': len(alerts),
                'risk_counts': risk_counts,
                'alerts': alerts[:50]  # Store limited raw alerts
            }
        )

    def _map_risk_to_severity(self, risk: str) -> str:
        """Map ZAP risk level to our severity."""
        mapping = {
            'High': 'high',
            'Medium': 'medium',
            'Low': 'low',
            'Informational': 'info',
        }
        return mapping.get(risk, 'info')

    def _format_description(self, alert: Dict) -> str:
        """Format ZAP alert into readable description."""
        parts = []

        if alert.get('description'):
            parts.append(alert['description'])

        if alert.get('attack'):
            parts.append(f"\nAttack: {alert['attack']}")

        if alert.get('evidence'):
            evidence = alert['evidence'][:200]
            parts.append(f"\nEvidence: {evidence}")

        if alert.get('reference'):
            parts.append(f"\nReference: {alert['reference']}")

        return '\n'.join(parts)