secure-web/backend/scanner/scanners/runner.py

315 lines
11 KiB
Python

"""
Scan Runner - Orchestrates multiple scanners.
This module coordinates running all enabled scanners against a URL
and aggregates their results into a unified report.
"""
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Dict, List, Optional, Type
from django.conf import settings
from .base import BaseScanner, ScannerResult, ScannerStatus
from .lighthouse import LighthouseScanner
from .playwright_scanner import PlaywrightScanner
from .zap import ZAPScanner
from .headers import HeaderScanner
from .tls import TLSScanner
logger = logging.getLogger(__name__)
# Default scanner classes to run
DEFAULT_SCANNERS: List[Type[BaseScanner]] = [
LighthouseScanner,
PlaywrightScanner,
ZAPScanner,
HeaderScanner,
TLSScanner,
]
class ScanRunner:
"""
Orchestrates running multiple scanners and aggregating results.
This class manages:
- Running enabled scanners in parallel or sequence
- Aggregating results from all scanners
- Error handling and partial result compilation
- Timeout management
"""
def __init__(
self,
scanner_classes: Optional[List[Type[BaseScanner]]] = None,
config: Optional[Dict[str, Any]] = None,
max_workers: int = 3
):
"""
Initialize the scan runner.
Args:
scanner_classes: List of scanner classes to use (defaults to all)
config: Configuration dict passed to each scanner
max_workers: Maximum concurrent scanner threads
"""
self.scanner_classes = scanner_classes or DEFAULT_SCANNERS
self.config = config or {}
self.max_workers = max_workers
self.logger = logging.getLogger(__name__)
def run(self, url: str, parallel: bool = True) -> Dict[str, Any]:
"""
Run all scanners against the URL.
Args:
url: The URL to scan
parallel: Whether to run scanners in parallel
Returns:
Aggregated results dictionary containing:
- status: Overall scan status
- scores: Aggregated scores
- issues: All issues from all scanners
- metrics: All metrics from all scanners
- scanner_results: Individual scanner results
- errors: Any scanner errors
"""
self.logger.info(f"Starting scan runner for {url} with {len(self.scanner_classes)} scanners")
# Initialize scanners
scanners = self._initialize_scanners()
# Run scanners
if parallel:
results = self._run_parallel(scanners, url)
else:
results = self._run_sequential(scanners, url)
# Aggregate results
aggregated = self._aggregate_results(results)
self.logger.info(
f"Scan complete: {len(aggregated['issues'])} issues, "
f"{len(aggregated['metrics'])} metrics, "
f"status: {aggregated['status']}"
)
return aggregated
def _initialize_scanners(self) -> List[BaseScanner]:
"""Initialize scanner instances."""
scanners = []
scanner_config = settings.SCANNER_CONFIG
for scanner_class in self.scanner_classes:
try:
# Merge default config with scanner-specific config
config = {**self.config}
# Add scanner-specific config
if scanner_class == LighthouseScanner:
config['service_url'] = 'http://lighthouse:3001'
config['timeout'] = scanner_config.get('LIGHTHOUSE_TIMEOUT', 60)
elif scanner_class == ZAPScanner:
config['zap_host'] = scanner_config.get('ZAP_HOST')
config['api_key'] = scanner_config.get('ZAP_API_KEY')
config['timeout'] = scanner_config.get('ZAP_TIMEOUT', 120)
elif scanner_class == PlaywrightScanner:
config['timeout'] = scanner_config.get('PLAYWRIGHT_TIMEOUT', 30000)
config['viewport'] = scanner_config.get('PLAYWRIGHT_VIEWPORT', {'width': 1920, 'height': 1080})
scanner = scanner_class(config=config)
scanners.append(scanner)
except Exception as e:
self.logger.error(f"Failed to initialize {scanner_class.__name__}: {e}")
return scanners
def _run_parallel(
self,
scanners: List[BaseScanner],
url: str
) -> Dict[str, ScannerResult]:
"""Run scanners in parallel using thread pool."""
results = {}
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# Submit all scanner tasks
future_to_scanner = {
executor.submit(self._run_scanner, scanner, url): scanner
for scanner in scanners
}
# Collect results as they complete
for future in as_completed(future_to_scanner):
scanner = future_to_scanner[future]
try:
result = future.result()
results[scanner.name] = result
except Exception as e:
self.logger.error(f"Scanner {scanner.name} raised exception: {e}")
results[scanner.name] = ScannerResult(
scanner_name=scanner.name,
status=ScannerStatus.FAILED,
error_message=str(e)
)
return results
def _run_sequential(
self,
scanners: List[BaseScanner],
url: str
) -> Dict[str, ScannerResult]:
"""Run scanners sequentially."""
results = {}
for scanner in scanners:
result = self._run_scanner(scanner, url)
results[scanner.name] = result
return results
def _run_scanner(self, scanner: BaseScanner, url: str) -> ScannerResult:
"""Run a single scanner with error handling."""
self.logger.info(f"Running scanner: {scanner.name}")
try:
# Check availability first
if not scanner.is_available():
self.logger.warning(f"Scanner {scanner.name} is not available")
return ScannerResult(
scanner_name=scanner.name,
status=ScannerStatus.SKIPPED,
error_message=f"{scanner.name} service is not available"
)
# Run the scanner
result = scanner.run(url)
self.logger.info(
f"Scanner {scanner.name} completed with status: {result.status}"
)
return result
except Exception as e:
self.logger.error(f"Scanner {scanner.name} failed: {e}")
return ScannerResult(
scanner_name=scanner.name,
status=ScannerStatus.FAILED,
error_message=str(e)
)
def _aggregate_results(
self,
results: Dict[str, ScannerResult]
) -> Dict[str, Any]:
"""Aggregate results from all scanners."""
all_issues = []
all_metrics = []
all_scores = {}
raw_data = {}
errors = []
successful_scanners = 0
failed_scanners = 0
for scanner_name, result in results.items():
# Track scanner status
if result.status == ScannerStatus.SUCCESS:
successful_scanners += 1
elif result.status == ScannerStatus.FAILED:
failed_scanners += 1
if result.error_message:
errors.append({
'scanner': scanner_name,
'error': result.error_message
})
elif result.status == ScannerStatus.PARTIAL:
successful_scanners += 1
# Collect issues
for issue in result.issues:
all_issues.append({
'category': issue.category,
'severity': issue.severity,
'title': issue.title,
'description': issue.description,
'tool': issue.tool,
'affected_url': issue.affected_url,
'remediation': issue.remediation,
'raw_data': issue.raw_data,
})
# Collect metrics
for metric in result.metrics:
all_metrics.append({
'name': metric.name,
'display_name': metric.display_name,
'value': metric.value,
'unit': metric.unit,
'source': metric.source,
'score': metric.score,
})
# Collect scores
if result.scores:
all_scores[scanner_name] = result.scores
# Store raw data
if result.raw_data:
raw_data[scanner_name] = result.raw_data
# Determine overall status
if failed_scanners == len(results):
overall_status = 'failed'
elif failed_scanners > 0:
overall_status = 'partial'
else:
overall_status = 'done'
# Calculate aggregated scores
aggregated_scores = self._calculate_aggregated_scores(all_scores)
return {
'status': overall_status,
'scores': aggregated_scores,
'issues': all_issues,
'metrics': all_metrics,
'scanner_results': {
name: {
'status': result.status.value,
'error': result.error_message,
}
for name, result in results.items()
},
'raw_data': raw_data,
'errors': errors,
'summary': {
'total_scanners': len(results),
'successful': successful_scanners,
'failed': failed_scanners,
'total_issues': len(all_issues),
'total_metrics': len(all_metrics),
}
}
def _calculate_aggregated_scores(
self,
scanner_scores: Dict[str, Dict[str, int]]
) -> Dict[str, Optional[int]]:
"""Calculate aggregated scores from all scanners."""
# Lighthouse provides the main scores
lighthouse_scores = scanner_scores.get('lighthouse', {})
return {
'performance': lighthouse_scores.get('performance'),
'accessibility': lighthouse_scores.get('accessibility'),
'best_practices': lighthouse_scores.get('best_practices'),
'seo': lighthouse_scores.get('seo'),
}