secure-web/backend/scanner/scanners/base.py

"""
Base scanner interface and result structures.

All scanner implementations should inherit from BaseScanner
and return ScannerResult objects.
"""

import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from enum import Enum

logger = logging.getLogger(__name__)


class ScannerStatus(str, Enum):
    """Status of a scanner execution."""
    SUCCESS = "success"
    PARTIAL = "partial"
    FAILED = "failed"
    SKIPPED = "skipped"


@dataclass
class IssueData:
    """
    Represents a single issue found by a scanner.

    Attributes:
        category: Issue category (security, performance, etc.)
        severity: Issue severity (critical, high, medium, low, info)
        title: Brief title of the issue
        description: Detailed description
        tool: The scanner that found this issue
        affected_url: Specific URL affected (optional)
        remediation: Suggested fix (optional)
        raw_data: Original scanner data (optional)
    """
    category: str
    severity: str
    title: str
    description: str
    tool: str
    affected_url: Optional[str] = None
    remediation: Optional[str] = None
    raw_data: Optional[Dict[str, Any]] = None


@dataclass
class MetricData:
    """
    Represents a single metric measured by a scanner.

    Attributes:
        name: Internal name (e.g., 'first_contentful_paint_ms')
        display_name: Human-readable name
        value: Numeric value
        unit: Unit of measurement
        source: The scanner that measured this
        score: Normalized score (0-1) if available
    """
    name: str
    display_name: str
    value: float
    unit: str
    source: str
    score: Optional[float] = None


@dataclass
class ScannerResult:
    """
    Result of a scanner execution.

    Attributes:
        scanner_name: Name of the scanner
        status: Execution status
        issues: List of issues found
        metrics: List of metrics measured
        scores: Dictionary of category scores
        raw_data: Original scanner output
        error_message: Error details if failed
    """
    scanner_name: str
    status: ScannerStatus
    issues: List[IssueData] = field(default_factory=list)
    metrics: List[MetricData] = field(default_factory=list)
    scores: Dict[str, int] = field(default_factory=dict)
    raw_data: Optional[Dict[str, Any]] = None
    error_message: Optional[str] = None


class BaseScanner(ABC):
    """
    Abstract base class for all scanners.

    Each scanner implementation must implement the `run` method
    which performs the actual scan and returns a ScannerResult.
    """

    name: str = "base"

    def __init__(self, config: Optional[Dict[str, Any]] = None):
        """
        Initialize the scanner with optional configuration.

        Args:
            config: Scanner-specific configuration dictionary
        """
        self.config = config or {}
        self.logger = logging.getLogger(f"scanner.{self.name}")

    @abstractmethod
    def run(self, url: str) -> ScannerResult:
        """
        Run the scanner against the given URL.

        Args:
            url: The URL to scan

        Returns:
            ScannerResult with findings, metrics, and status
        """
        pass

    def is_available(self) -> bool:
        """
        Check if the scanner service/tool is available.

        Returns:
            True if the scanner can be used, False otherwise
        """
        return True

    def _create_error_result(self, error: Exception) -> ScannerResult:
        """
        Create a failed result from an exception.

        Args:
            error: The exception that occurred

        Returns:
            ScannerResult with failed status
        """
        self.logger.error(f"Scanner {self.name} failed: {error}")
        return ScannerResult(
            scanner_name=self.name,
            status=ScannerStatus.FAILED,
            error_message=str(error),
            issues=[
                IssueData(
                    category="scanner",
                    severity="info",
                    title=f"{self.name.title()} scan failed",
                    description=f"The {self.name} scanner encountered an error: {error}",
                    tool=self.name,
                    remediation="Check scanner service configuration and availability."
                )
            ]
        )