""" Scan Runner - Orchestrates multiple scanners. This module coordinates running all enabled scanners against a URL and aggregates their results into a unified report. """ import logging from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Any, Dict, List, Optional, Type from django.conf import settings from .base import BaseScanner, ScannerResult, ScannerStatus from .lighthouse import LighthouseScanner from .playwright_scanner import PlaywrightScanner from .zap import ZAPScanner from .headers import HeaderScanner from .tls import TLSScanner logger = logging.getLogger(__name__) # Default scanner classes to run DEFAULT_SCANNERS: List[Type[BaseScanner]] = [ LighthouseScanner, PlaywrightScanner, ZAPScanner, HeaderScanner, TLSScanner, ] class ScanRunner: """ Orchestrates running multiple scanners and aggregating results. This class manages: - Running enabled scanners in parallel or sequence - Aggregating results from all scanners - Error handling and partial result compilation - Timeout management """ def __init__( self, scanner_classes: Optional[List[Type[BaseScanner]]] = None, config: Optional[Dict[str, Any]] = None, max_workers: int = 3 ): """ Initialize the scan runner. Args: scanner_classes: List of scanner classes to use (defaults to all) config: Configuration dict passed to each scanner max_workers: Maximum concurrent scanner threads """ self.scanner_classes = scanner_classes or DEFAULT_SCANNERS self.config = config or {} self.max_workers = max_workers self.logger = logging.getLogger(__name__) def run(self, url: str, parallel: bool = True) -> Dict[str, Any]: """ Run all scanners against the URL. Args: url: The URL to scan parallel: Whether to run scanners in parallel Returns: Aggregated results dictionary containing: - status: Overall scan status - scores: Aggregated scores - issues: All issues from all scanners - metrics: All metrics from all scanners - scanner_results: Individual scanner results - errors: Any scanner errors """ self.logger.info(f"Starting scan runner for {url} with {len(self.scanner_classes)} scanners") # Initialize scanners scanners = self._initialize_scanners() # Run scanners if parallel: results = self._run_parallel(scanners, url) else: results = self._run_sequential(scanners, url) # Aggregate results aggregated = self._aggregate_results(results) self.logger.info( f"Scan complete: {len(aggregated['issues'])} issues, " f"{len(aggregated['metrics'])} metrics, " f"status: {aggregated['status']}" ) return aggregated def _initialize_scanners(self) -> List[BaseScanner]: """Initialize scanner instances.""" scanners = [] scanner_config = settings.SCANNER_CONFIG for scanner_class in self.scanner_classes: try: # Merge default config with scanner-specific config config = {**self.config} # Add scanner-specific config if scanner_class == LighthouseScanner: config['service_url'] = 'http://lighthouse:3001' config['timeout'] = scanner_config.get('LIGHTHOUSE_TIMEOUT', 60) elif scanner_class == ZAPScanner: config['zap_host'] = scanner_config.get('ZAP_HOST') config['api_key'] = scanner_config.get('ZAP_API_KEY') config['timeout'] = scanner_config.get('ZAP_TIMEOUT', 120) elif scanner_class == PlaywrightScanner: config['timeout'] = scanner_config.get('PLAYWRIGHT_TIMEOUT', 30000) config['viewport'] = scanner_config.get('PLAYWRIGHT_VIEWPORT', {'width': 1920, 'height': 1080}) scanner = scanner_class(config=config) scanners.append(scanner) except Exception as e: self.logger.error(f"Failed to initialize {scanner_class.__name__}: {e}") return scanners def _run_parallel( self, scanners: List[BaseScanner], url: str ) -> Dict[str, ScannerResult]: """Run scanners in parallel using thread pool.""" results = {} with ThreadPoolExecutor(max_workers=self.max_workers) as executor: # Submit all scanner tasks future_to_scanner = { executor.submit(self._run_scanner, scanner, url): scanner for scanner in scanners } # Collect results as they complete for future in as_completed(future_to_scanner): scanner = future_to_scanner[future] try: result = future.result() results[scanner.name] = result except Exception as e: self.logger.error(f"Scanner {scanner.name} raised exception: {e}") results[scanner.name] = ScannerResult( scanner_name=scanner.name, status=ScannerStatus.FAILED, error_message=str(e) ) return results def _run_sequential( self, scanners: List[BaseScanner], url: str ) -> Dict[str, ScannerResult]: """Run scanners sequentially.""" results = {} for scanner in scanners: result = self._run_scanner(scanner, url) results[scanner.name] = result return results def _run_scanner(self, scanner: BaseScanner, url: str) -> ScannerResult: """Run a single scanner with error handling.""" self.logger.info(f"Running scanner: {scanner.name}") try: # Check availability first if not scanner.is_available(): self.logger.warning(f"Scanner {scanner.name} is not available") return ScannerResult( scanner_name=scanner.name, status=ScannerStatus.SKIPPED, error_message=f"{scanner.name} service is not available" ) # Run the scanner result = scanner.run(url) self.logger.info( f"Scanner {scanner.name} completed with status: {result.status}" ) return result except Exception as e: self.logger.error(f"Scanner {scanner.name} failed: {e}") return ScannerResult( scanner_name=scanner.name, status=ScannerStatus.FAILED, error_message=str(e) ) def _aggregate_results( self, results: Dict[str, ScannerResult] ) -> Dict[str, Any]: """Aggregate results from all scanners.""" all_issues = [] all_metrics = [] all_scores = {} raw_data = {} errors = [] successful_scanners = 0 failed_scanners = 0 for scanner_name, result in results.items(): # Track scanner status if result.status == ScannerStatus.SUCCESS: successful_scanners += 1 elif result.status == ScannerStatus.FAILED: failed_scanners += 1 if result.error_message: errors.append({ 'scanner': scanner_name, 'error': result.error_message }) elif result.status == ScannerStatus.PARTIAL: successful_scanners += 1 # Collect issues for issue in result.issues: all_issues.append({ 'category': issue.category, 'severity': issue.severity, 'title': issue.title, 'description': issue.description, 'tool': issue.tool, 'affected_url': issue.affected_url, 'remediation': issue.remediation, 'raw_data': issue.raw_data, }) # Collect metrics for metric in result.metrics: all_metrics.append({ 'name': metric.name, 'display_name': metric.display_name, 'value': metric.value, 'unit': metric.unit, 'source': metric.source, 'score': metric.score, }) # Collect scores if result.scores: all_scores[scanner_name] = result.scores # Store raw data if result.raw_data: raw_data[scanner_name] = result.raw_data # Determine overall status if failed_scanners == len(results): overall_status = 'failed' elif failed_scanners > 0: overall_status = 'partial' else: overall_status = 'done' # Calculate aggregated scores aggregated_scores = self._calculate_aggregated_scores(all_scores) return { 'status': overall_status, 'scores': aggregated_scores, 'issues': all_issues, 'metrics': all_metrics, 'scanner_results': { name: { 'status': result.status.value, 'error': result.error_message, } for name, result in results.items() }, 'raw_data': raw_data, 'errors': errors, 'summary': { 'total_scanners': len(results), 'successful': successful_scanners, 'failed': failed_scanners, 'total_issues': len(all_issues), 'total_metrics': len(all_metrics), } } def _calculate_aggregated_scores( self, scanner_scores: Dict[str, Dict[str, int]] ) -> Dict[str, Optional[int]]: """Calculate aggregated scores from all scanners.""" # Lighthouse provides the main scores lighthouse_scores = scanner_scores.get('lighthouse', {}) return { 'performance': lighthouse_scores.get('performance'), 'accessibility': lighthouse_scores.get('accessibility'), 'best_practices': lighthouse_scores.get('best_practices'), 'seo': lighthouse_scores.get('seo'), }