321 lines
12 KiB
Python
321 lines
12 KiB
Python
"""
|
|
Lighthouse Scanner Integration.
|
|
|
|
This module integrates with the Lighthouse scanner service
|
|
to perform performance, accessibility, SEO, and best practices audits.
|
|
"""
|
|
|
|
import time
|
|
import logging
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
|
|
from django.conf import settings
|
|
|
|
from .base import BaseScanner, ScannerResult, ScannerStatus
|
|
|
|
logger = logging.getLogger('scanner')
|
|
|
|
|
|
class LighthouseScanner(BaseScanner):
|
|
"""
|
|
Scanner that integrates with the Lighthouse service.
|
|
|
|
Lighthouse audits:
|
|
- Performance (FCP, LCP, TTI, TBT, CLS, Speed Index)
|
|
- Accessibility
|
|
- Best Practices
|
|
- SEO
|
|
"""
|
|
|
|
name = "lighthouse"
|
|
|
|
def __init__(self, config: dict = None):
|
|
super().__init__(config)
|
|
self.service_url = self.config.get(
|
|
'lighthouse_url',
|
|
settings.SCANNER_CONFIG.get('LIGHTHOUSE_URL', 'http://lighthouse:3001')
|
|
)
|
|
self.timeout = self.config.get('timeout', 120)
|
|
|
|
def is_available(self) -> bool:
|
|
"""Check if Lighthouse service is available."""
|
|
try:
|
|
response = httpx.get(
|
|
f"{self.service_url}/health",
|
|
timeout=5
|
|
)
|
|
return response.status_code == 200
|
|
except Exception as e:
|
|
self.logger.warning(f"Lighthouse service not available: {e}")
|
|
return False
|
|
|
|
def run(self, url: str) -> ScannerResult:
|
|
"""
|
|
Run Lighthouse audit on the given URL.
|
|
|
|
Args:
|
|
url: The URL to audit
|
|
|
|
Returns:
|
|
ScannerResult with Lighthouse data
|
|
"""
|
|
start_time = time.time()
|
|
|
|
if not self.is_available():
|
|
return ScannerResult(
|
|
status=ScannerStatus.FAILED,
|
|
scanner_name=self.name,
|
|
error_message="Lighthouse service is not available",
|
|
execution_time_seconds=time.time() - start_time
|
|
)
|
|
|
|
try:
|
|
# Call Lighthouse service
|
|
response = httpx.post(
|
|
f"{self.service_url}/scan",
|
|
json={"url": url},
|
|
timeout=self.timeout
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
return ScannerResult(
|
|
status=ScannerStatus.FAILED,
|
|
scanner_name=self.name,
|
|
error_message=f"Lighthouse returned status {response.status_code}: {response.text}",
|
|
execution_time_seconds=time.time() - start_time
|
|
)
|
|
|
|
data = response.json()
|
|
|
|
# Extract scores
|
|
scores = {
|
|
'performance': data.get('scores', {}).get('performance'),
|
|
'accessibility': data.get('scores', {}).get('accessibility'),
|
|
'best_practices': data.get('scores', {}).get('bestPractices'),
|
|
'seo': data.get('scores', {}).get('seo'),
|
|
}
|
|
|
|
# Extract metrics
|
|
metrics = self._extract_metrics(data)
|
|
|
|
# Extract issues
|
|
issues = self._extract_issues(data)
|
|
|
|
execution_time = time.time() - start_time
|
|
|
|
return ScannerResult(
|
|
status=ScannerStatus.SUCCESS,
|
|
scanner_name=self.name,
|
|
scores=scores,
|
|
metrics=metrics,
|
|
issues=issues,
|
|
raw_data=data,
|
|
execution_time_seconds=execution_time
|
|
)
|
|
|
|
except httpx.TimeoutException:
|
|
return ScannerResult(
|
|
status=ScannerStatus.FAILED,
|
|
scanner_name=self.name,
|
|
error_message="Lighthouse scan timed out",
|
|
execution_time_seconds=time.time() - start_time
|
|
)
|
|
except httpx.RequestError as e:
|
|
return ScannerResult(
|
|
status=ScannerStatus.FAILED,
|
|
scanner_name=self.name,
|
|
error_message=f"Lighthouse request failed: {e}",
|
|
execution_time_seconds=time.time() - start_time
|
|
)
|
|
except Exception as e:
|
|
logger.exception(f"Lighthouse scan failed for {url}")
|
|
return ScannerResult(
|
|
status=ScannerStatus.FAILED,
|
|
scanner_name=self.name,
|
|
error_message=f"Unexpected error: {e}",
|
|
execution_time_seconds=time.time() - start_time
|
|
)
|
|
|
|
def _extract_metrics(self, data: dict) -> list:
|
|
"""Extract key metrics from Lighthouse data."""
|
|
metrics = []
|
|
|
|
# Core Web Vitals and performance metrics
|
|
metrics_config = {
|
|
'first_contentful_paint': ('First Contentful Paint', 'firstContentfulPaint', 'ms'),
|
|
'largest_contentful_paint': ('Largest Contentful Paint', 'largestContentfulPaint', 'ms'),
|
|
'speed_index': ('Speed Index', 'speedIndex', 'ms'),
|
|
'time_to_interactive': ('Time to Interactive', 'timeToInteractive', 'ms'),
|
|
'total_blocking_time': ('Total Blocking Time', 'totalBlockingTime', 'ms'),
|
|
'cumulative_layout_shift': ('Cumulative Layout Shift', 'cumulativeLayoutShift', 'score'),
|
|
}
|
|
|
|
lh_metrics = data.get('metrics', {})
|
|
|
|
for metric_name, (display_name, lh_key, unit) in metrics_config.items():
|
|
metric_data = lh_metrics.get(lh_key, {})
|
|
if metric_data and metric_data.get('value') is not None:
|
|
metrics.append(self._create_metric(
|
|
name=metric_name,
|
|
display_name=display_name,
|
|
value=metric_data['value'],
|
|
unit=unit,
|
|
score=metric_data.get('score')
|
|
))
|
|
|
|
# Resource metrics
|
|
resources = data.get('resources', {})
|
|
diagnostics = data.get('diagnostics', {})
|
|
|
|
if resources.get('totalByteWeight'):
|
|
metrics.append(self._create_metric(
|
|
name='total_byte_weight',
|
|
display_name='Total Page Weight',
|
|
value=resources['totalByteWeight'],
|
|
unit='bytes'
|
|
))
|
|
|
|
if diagnostics.get('numRequests'):
|
|
metrics.append(self._create_metric(
|
|
name='num_requests',
|
|
display_name='Total Requests',
|
|
value=diagnostics['numRequests'],
|
|
unit='count'
|
|
))
|
|
|
|
if diagnostics.get('numScripts'):
|
|
metrics.append(self._create_metric(
|
|
name='num_scripts',
|
|
display_name='JavaScript Files',
|
|
value=diagnostics['numScripts'],
|
|
unit='count'
|
|
))
|
|
|
|
if diagnostics.get('totalTransferSize'):
|
|
metrics.append(self._create_metric(
|
|
name='total_transfer_size',
|
|
display_name='Total Transfer Size',
|
|
value=diagnostics['totalTransferSize'],
|
|
unit='bytes'
|
|
))
|
|
|
|
return metrics
|
|
|
|
def _extract_issues(self, data: dict) -> list:
|
|
"""Extract issues from Lighthouse audit results."""
|
|
issues = []
|
|
|
|
# Convert Lighthouse issues to our format
|
|
lh_issues = data.get('issues', [])
|
|
|
|
# Map Lighthouse categories to our categories
|
|
category_map = {
|
|
'performance': 'performance',
|
|
'accessibility': 'accessibility',
|
|
'best-practices': 'best_practices',
|
|
'seo': 'seo',
|
|
}
|
|
|
|
for lh_issue in lh_issues:
|
|
# Determine severity based on score and impact
|
|
score = lh_issue.get('score', 0)
|
|
impact = lh_issue.get('impact', 0)
|
|
|
|
if score == 0 and impact > 5:
|
|
severity = 'high'
|
|
elif score < 0.5 and impact > 3:
|
|
severity = 'medium'
|
|
elif score < 0.5:
|
|
severity = 'low'
|
|
else:
|
|
severity = 'info'
|
|
|
|
category = category_map.get(lh_issue.get('category'), 'performance')
|
|
|
|
issues.append(self._create_issue(
|
|
category=category,
|
|
severity=severity,
|
|
title=lh_issue.get('title', 'Unknown issue'),
|
|
description=lh_issue.get('description', ''),
|
|
raw_data={
|
|
'id': lh_issue.get('id'),
|
|
'displayValue': lh_issue.get('displayValue'),
|
|
'score': score,
|
|
'impact': impact,
|
|
}
|
|
))
|
|
|
|
# Check for unused resources
|
|
resources = data.get('resources', {})
|
|
|
|
# Unused JavaScript
|
|
unused_js = resources.get('unusedJavascript', [])
|
|
for item in unused_js[:5]: # Top 5
|
|
if item.get('wastedBytes', 0) > 50000: # > 50KB wasted
|
|
issues.append(self._create_issue(
|
|
category='performance',
|
|
severity='medium',
|
|
title='Unused JavaScript',
|
|
description=f"Remove unused JavaScript to reduce payload. {item.get('url', '')} has {item.get('wastedBytes', 0) / 1024:.1f}KB unused.",
|
|
remediation='Remove unused JavaScript code or use code splitting to load only what is needed.',
|
|
raw_data=item
|
|
))
|
|
|
|
# Unused CSS
|
|
unused_css = resources.get('unusedCss', [])
|
|
for item in unused_css[:5]:
|
|
if item.get('wastedBytes', 0) > 20000: # > 20KB wasted
|
|
issues.append(self._create_issue(
|
|
category='performance',
|
|
severity='low',
|
|
title='Unused CSS',
|
|
description=f"Remove unused CSS rules. {item.get('url', '')} has {item.get('wastedBytes', 0) / 1024:.1f}KB unused.",
|
|
remediation='Use tools like PurgeCSS to remove unused CSS.',
|
|
raw_data=item
|
|
))
|
|
|
|
# Render-blocking resources
|
|
blocking = resources.get('renderBlockingResources', [])
|
|
if len(blocking) > 3:
|
|
issues.append(self._create_issue(
|
|
category='performance',
|
|
severity='medium',
|
|
title='Multiple render-blocking resources',
|
|
description=f'Found {len(blocking)} render-blocking resources that delay page rendering.',
|
|
remediation='Defer non-critical JavaScript and inline critical CSS.',
|
|
raw_data={'resources': blocking[:10]}
|
|
))
|
|
|
|
# Large JavaScript bundles
|
|
large_scripts = resources.get('scriptTreemap', [])
|
|
for script in large_scripts[:5]:
|
|
if script.get('resourceBytes', 0) > 500000: # > 500KB
|
|
issues.append(self._create_issue(
|
|
category='resources',
|
|
severity='medium',
|
|
title='Large JavaScript bundle',
|
|
description=f"Large script bundle detected: {script.get('name', 'Unknown')} ({script.get('resourceBytes', 0) / 1024:.1f}KB)",
|
|
remediation='Consider code splitting and lazy loading to reduce bundle size.',
|
|
raw_data=script
|
|
))
|
|
|
|
# Third-party impact
|
|
third_party = resources.get('thirdPartySummary', [])
|
|
high_impact_third_party = [
|
|
tp for tp in third_party
|
|
if tp.get('blockingTime', 0) > 500 # > 500ms blocking
|
|
]
|
|
if high_impact_third_party:
|
|
issues.append(self._create_issue(
|
|
category='performance',
|
|
severity='medium',
|
|
title='Third-party scripts impacting performance',
|
|
description=f'{len(high_impact_third_party)} third-party scripts are significantly impacting page load time.',
|
|
remediation='Consider lazy loading third-party scripts or using async/defer attributes.',
|
|
raw_data={'third_parties': high_impact_third_party}
|
|
))
|
|
|
|
return issues
|