315 lines
11 KiB
Python
315 lines
11 KiB
Python
"""
|
|
Scan Runner - Orchestrates multiple scanners.
|
|
|
|
This module coordinates running all enabled scanners against a URL
|
|
and aggregates their results into a unified report.
|
|
"""
|
|
|
|
import logging
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from typing import Any, Dict, List, Optional, Type
|
|
|
|
from django.conf import settings
|
|
|
|
from .base import BaseScanner, ScannerResult, ScannerStatus
|
|
from .lighthouse import LighthouseScanner
|
|
from .playwright_scanner import PlaywrightScanner
|
|
from .zap import ZAPScanner
|
|
from .headers import HeaderScanner
|
|
from .tls import TLSScanner
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Default scanner classes to run
|
|
DEFAULT_SCANNERS: List[Type[BaseScanner]] = [
|
|
LighthouseScanner,
|
|
PlaywrightScanner,
|
|
ZAPScanner,
|
|
HeaderScanner,
|
|
TLSScanner,
|
|
]
|
|
|
|
|
|
class ScanRunner:
|
|
"""
|
|
Orchestrates running multiple scanners and aggregating results.
|
|
|
|
This class manages:
|
|
- Running enabled scanners in parallel or sequence
|
|
- Aggregating results from all scanners
|
|
- Error handling and partial result compilation
|
|
- Timeout management
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
scanner_classes: Optional[List[Type[BaseScanner]]] = None,
|
|
config: Optional[Dict[str, Any]] = None,
|
|
max_workers: int = 3
|
|
):
|
|
"""
|
|
Initialize the scan runner.
|
|
|
|
Args:
|
|
scanner_classes: List of scanner classes to use (defaults to all)
|
|
config: Configuration dict passed to each scanner
|
|
max_workers: Maximum concurrent scanner threads
|
|
"""
|
|
self.scanner_classes = scanner_classes or DEFAULT_SCANNERS
|
|
self.config = config or {}
|
|
self.max_workers = max_workers
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def run(self, url: str, parallel: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Run all scanners against the URL.
|
|
|
|
Args:
|
|
url: The URL to scan
|
|
parallel: Whether to run scanners in parallel
|
|
|
|
Returns:
|
|
Aggregated results dictionary containing:
|
|
- status: Overall scan status
|
|
- scores: Aggregated scores
|
|
- issues: All issues from all scanners
|
|
- metrics: All metrics from all scanners
|
|
- scanner_results: Individual scanner results
|
|
- errors: Any scanner errors
|
|
"""
|
|
self.logger.info(f"Starting scan runner for {url} with {len(self.scanner_classes)} scanners")
|
|
|
|
# Initialize scanners
|
|
scanners = self._initialize_scanners()
|
|
|
|
# Run scanners
|
|
if parallel:
|
|
results = self._run_parallel(scanners, url)
|
|
else:
|
|
results = self._run_sequential(scanners, url)
|
|
|
|
# Aggregate results
|
|
aggregated = self._aggregate_results(results)
|
|
|
|
self.logger.info(
|
|
f"Scan complete: {len(aggregated['issues'])} issues, "
|
|
f"{len(aggregated['metrics'])} metrics, "
|
|
f"status: {aggregated['status']}"
|
|
)
|
|
|
|
return aggregated
|
|
|
|
def _initialize_scanners(self) -> List[BaseScanner]:
|
|
"""Initialize scanner instances."""
|
|
scanners = []
|
|
scanner_config = settings.SCANNER_CONFIG
|
|
|
|
for scanner_class in self.scanner_classes:
|
|
try:
|
|
# Merge default config with scanner-specific config
|
|
config = {**self.config}
|
|
|
|
# Add scanner-specific config
|
|
if scanner_class == LighthouseScanner:
|
|
config['service_url'] = 'http://lighthouse:3001'
|
|
config['timeout'] = scanner_config.get('LIGHTHOUSE_TIMEOUT', 60)
|
|
elif scanner_class == ZAPScanner:
|
|
config['zap_host'] = scanner_config.get('ZAP_HOST')
|
|
config['api_key'] = scanner_config.get('ZAP_API_KEY')
|
|
config['timeout'] = scanner_config.get('ZAP_TIMEOUT', 120)
|
|
elif scanner_class == PlaywrightScanner:
|
|
config['timeout'] = scanner_config.get('PLAYWRIGHT_TIMEOUT', 30000)
|
|
config['viewport'] = scanner_config.get('PLAYWRIGHT_VIEWPORT', {'width': 1920, 'height': 1080})
|
|
|
|
scanner = scanner_class(config=config)
|
|
scanners.append(scanner)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to initialize {scanner_class.__name__}: {e}")
|
|
|
|
return scanners
|
|
|
|
def _run_parallel(
|
|
self,
|
|
scanners: List[BaseScanner],
|
|
url: str
|
|
) -> Dict[str, ScannerResult]:
|
|
"""Run scanners in parallel using thread pool."""
|
|
results = {}
|
|
|
|
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
# Submit all scanner tasks
|
|
future_to_scanner = {
|
|
executor.submit(self._run_scanner, scanner, url): scanner
|
|
for scanner in scanners
|
|
}
|
|
|
|
# Collect results as they complete
|
|
for future in as_completed(future_to_scanner):
|
|
scanner = future_to_scanner[future]
|
|
try:
|
|
result = future.result()
|
|
results[scanner.name] = result
|
|
except Exception as e:
|
|
self.logger.error(f"Scanner {scanner.name} raised exception: {e}")
|
|
results[scanner.name] = ScannerResult(
|
|
scanner_name=scanner.name,
|
|
status=ScannerStatus.FAILED,
|
|
error_message=str(e)
|
|
)
|
|
|
|
return results
|
|
|
|
def _run_sequential(
|
|
self,
|
|
scanners: List[BaseScanner],
|
|
url: str
|
|
) -> Dict[str, ScannerResult]:
|
|
"""Run scanners sequentially."""
|
|
results = {}
|
|
|
|
for scanner in scanners:
|
|
result = self._run_scanner(scanner, url)
|
|
results[scanner.name] = result
|
|
|
|
return results
|
|
|
|
def _run_scanner(self, scanner: BaseScanner, url: str) -> ScannerResult:
|
|
"""Run a single scanner with error handling."""
|
|
self.logger.info(f"Running scanner: {scanner.name}")
|
|
|
|
try:
|
|
# Check availability first
|
|
if not scanner.is_available():
|
|
self.logger.warning(f"Scanner {scanner.name} is not available")
|
|
return ScannerResult(
|
|
scanner_name=scanner.name,
|
|
status=ScannerStatus.SKIPPED,
|
|
error_message=f"{scanner.name} service is not available"
|
|
)
|
|
|
|
# Run the scanner
|
|
result = scanner.run(url)
|
|
self.logger.info(
|
|
f"Scanner {scanner.name} completed with status: {result.status}"
|
|
)
|
|
return result
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Scanner {scanner.name} failed: {e}")
|
|
return ScannerResult(
|
|
scanner_name=scanner.name,
|
|
status=ScannerStatus.FAILED,
|
|
error_message=str(e)
|
|
)
|
|
|
|
def _aggregate_results(
|
|
self,
|
|
results: Dict[str, ScannerResult]
|
|
) -> Dict[str, Any]:
|
|
"""Aggregate results from all scanners."""
|
|
all_issues = []
|
|
all_metrics = []
|
|
all_scores = {}
|
|
raw_data = {}
|
|
errors = []
|
|
|
|
successful_scanners = 0
|
|
failed_scanners = 0
|
|
|
|
for scanner_name, result in results.items():
|
|
# Track scanner status
|
|
if result.status == ScannerStatus.SUCCESS:
|
|
successful_scanners += 1
|
|
elif result.status == ScannerStatus.FAILED:
|
|
failed_scanners += 1
|
|
if result.error_message:
|
|
errors.append({
|
|
'scanner': scanner_name,
|
|
'error': result.error_message
|
|
})
|
|
elif result.status == ScannerStatus.PARTIAL:
|
|
successful_scanners += 1
|
|
|
|
# Collect issues
|
|
for issue in result.issues:
|
|
all_issues.append({
|
|
'category': issue.category,
|
|
'severity': issue.severity,
|
|
'title': issue.title,
|
|
'description': issue.description,
|
|
'tool': issue.tool,
|
|
'affected_url': issue.affected_url,
|
|
'remediation': issue.remediation,
|
|
'raw_data': issue.raw_data,
|
|
})
|
|
|
|
# Collect metrics
|
|
for metric in result.metrics:
|
|
all_metrics.append({
|
|
'name': metric.name,
|
|
'display_name': metric.display_name,
|
|
'value': metric.value,
|
|
'unit': metric.unit,
|
|
'source': metric.source,
|
|
'score': metric.score,
|
|
})
|
|
|
|
# Collect scores
|
|
if result.scores:
|
|
all_scores[scanner_name] = result.scores
|
|
|
|
# Store raw data
|
|
if result.raw_data:
|
|
raw_data[scanner_name] = result.raw_data
|
|
|
|
# Determine overall status
|
|
if failed_scanners == len(results):
|
|
overall_status = 'failed'
|
|
elif failed_scanners > 0:
|
|
overall_status = 'partial'
|
|
else:
|
|
overall_status = 'done'
|
|
|
|
# Calculate aggregated scores
|
|
aggregated_scores = self._calculate_aggregated_scores(all_scores)
|
|
|
|
return {
|
|
'status': overall_status,
|
|
'scores': aggregated_scores,
|
|
'issues': all_issues,
|
|
'metrics': all_metrics,
|
|
'scanner_results': {
|
|
name: {
|
|
'status': result.status.value,
|
|
'error': result.error_message,
|
|
}
|
|
for name, result in results.items()
|
|
},
|
|
'raw_data': raw_data,
|
|
'errors': errors,
|
|
'summary': {
|
|
'total_scanners': len(results),
|
|
'successful': successful_scanners,
|
|
'failed': failed_scanners,
|
|
'total_issues': len(all_issues),
|
|
'total_metrics': len(all_metrics),
|
|
}
|
|
}
|
|
|
|
def _calculate_aggregated_scores(
|
|
self,
|
|
scanner_scores: Dict[str, Dict[str, int]]
|
|
) -> Dict[str, Optional[int]]:
|
|
"""Calculate aggregated scores from all scanners."""
|
|
# Lighthouse provides the main scores
|
|
lighthouse_scores = scanner_scores.get('lighthouse', {})
|
|
|
|
return {
|
|
'performance': lighthouse_scores.get('performance'),
|
|
'accessibility': lighthouse_scores.get('accessibility'),
|
|
'best_practices': lighthouse_scores.get('best_practices'),
|
|
'seo': lighthouse_scores.get('seo'),
|
|
}
|