324 lines
12 KiB
Python
324 lines
12 KiB
Python
"""
|
|
Lighthouse Scanner Integration.
|
|
|
|
This module integrates with Google Lighthouse to measure
|
|
performance, accessibility, SEO, and best practices.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any, Dict, Optional
|
|
|
|
import httpx
|
|
|
|
from django.conf import settings
|
|
|
|
from .base import (
|
|
BaseScanner,
|
|
ScannerResult,
|
|
ScannerStatus,
|
|
IssueData,
|
|
MetricData,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class LighthouseScanner(BaseScanner):
|
|
"""
|
|
Scanner that uses Google Lighthouse for performance analysis.
|
|
|
|
Communicates with the Lighthouse service container via HTTP API.
|
|
Collects performance metrics, Core Web Vitals, and various audits.
|
|
"""
|
|
|
|
name = "lighthouse"
|
|
|
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
super().__init__(config)
|
|
self.service_url = self.config.get(
|
|
'service_url',
|
|
'http://lighthouse:3001'
|
|
)
|
|
self.timeout = self.config.get('timeout', 120)
|
|
|
|
def is_available(self) -> bool:
|
|
"""Check if Lighthouse service is available."""
|
|
try:
|
|
with httpx.Client(timeout=5) as client:
|
|
response = client.get(f"{self.service_url}/health")
|
|
return response.status_code == 200
|
|
except Exception as e:
|
|
self.logger.warning(f"Lighthouse service not available: {e}")
|
|
return False
|
|
|
|
def run(self, url: str) -> ScannerResult:
|
|
"""
|
|
Run Lighthouse scan against the URL.
|
|
|
|
Args:
|
|
url: The URL to analyze
|
|
|
|
Returns:
|
|
ScannerResult with performance metrics and issues
|
|
"""
|
|
self.logger.info(f"Starting Lighthouse scan for {url}")
|
|
|
|
try:
|
|
with httpx.Client(timeout=self.timeout) as client:
|
|
response = client.post(
|
|
f"{self.service_url}/scan",
|
|
json={"url": url}
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
return self._parse_results(url, data)
|
|
|
|
except httpx.TimeoutException:
|
|
return self._create_error_result(
|
|
Exception("Lighthouse scan timed out")
|
|
)
|
|
except httpx.HTTPStatusError as e:
|
|
return self._create_error_result(
|
|
Exception(f"Lighthouse service error: {e.response.status_code}")
|
|
)
|
|
except Exception as e:
|
|
return self._create_error_result(e)
|
|
|
|
def _parse_results(self, url: str, data: Dict[str, Any]) -> ScannerResult:
|
|
"""
|
|
Parse Lighthouse results into ScannerResult format.
|
|
|
|
Args:
|
|
url: The scanned URL
|
|
data: Raw Lighthouse response data
|
|
|
|
Returns:
|
|
Parsed ScannerResult
|
|
"""
|
|
issues = []
|
|
metrics = []
|
|
|
|
# Extract scores
|
|
scores = data.get('scores', {})
|
|
|
|
# Extract and create metrics
|
|
raw_metrics = data.get('metrics', {})
|
|
|
|
# Core Web Vitals
|
|
metric_mappings = [
|
|
('firstContentfulPaint', 'First Contentful Paint', 'ms'),
|
|
('largestContentfulPaint', 'Largest Contentful Paint', 'ms'),
|
|
('speedIndex', 'Speed Index', 'ms'),
|
|
('timeToInteractive', 'Time to Interactive', 'ms'),
|
|
('totalBlockingTime', 'Total Blocking Time', 'ms'),
|
|
('cumulativeLayoutShift', 'Cumulative Layout Shift', 'score'),
|
|
]
|
|
|
|
for key, display_name, unit in metric_mappings:
|
|
metric_data = raw_metrics.get(key, {})
|
|
if metric_data and metric_data.get('value') is not None:
|
|
metrics.append(MetricData(
|
|
name=self._to_snake_case(key),
|
|
display_name=display_name,
|
|
value=metric_data['value'],
|
|
unit=unit,
|
|
source='lighthouse',
|
|
score=metric_data.get('score')
|
|
))
|
|
|
|
# Resource metrics
|
|
resources = data.get('resources', {})
|
|
diagnostics = data.get('diagnostics', {})
|
|
|
|
if resources.get('totalByteWeight'):
|
|
metrics.append(MetricData(
|
|
name='total_byte_weight',
|
|
display_name='Total Page Weight',
|
|
value=resources['totalByteWeight'],
|
|
unit='bytes',
|
|
source='lighthouse'
|
|
))
|
|
|
|
if resources.get('bootupTime'):
|
|
metrics.append(MetricData(
|
|
name='javascript_bootup_time',
|
|
display_name='JavaScript Boot-up Time',
|
|
value=resources['bootupTime'],
|
|
unit='ms',
|
|
source='lighthouse'
|
|
))
|
|
|
|
if diagnostics.get('numRequests'):
|
|
metrics.append(MetricData(
|
|
name='total_requests',
|
|
display_name='Total Network Requests',
|
|
value=float(diagnostics['numRequests']),
|
|
unit='count',
|
|
source='lighthouse'
|
|
))
|
|
|
|
# Extract issues from failed audits
|
|
raw_issues = data.get('issues', [])
|
|
for issue in raw_issues:
|
|
severity = self._score_to_severity(issue.get('score', 0.5))
|
|
category = self._map_category(issue.get('category', 'performance'))
|
|
|
|
issues.append(IssueData(
|
|
category=category,
|
|
severity=severity,
|
|
title=issue.get('title', 'Unknown issue'),
|
|
description=issue.get('description', ''),
|
|
tool='lighthouse',
|
|
affected_url=url,
|
|
remediation=self._get_remediation(issue.get('id')),
|
|
raw_data=issue
|
|
))
|
|
|
|
# Check for large bundles
|
|
large_scripts = resources.get('scriptTreemap', [])
|
|
for script in large_scripts[:5]: # Top 5 largest
|
|
if script.get('resourceBytes', 0) > settings.SCANNER_CONFIG.get(
|
|
'LARGE_JS_BUNDLE_THRESHOLD_BYTES', 500 * 1024
|
|
):
|
|
issues.append(IssueData(
|
|
category='resources',
|
|
severity='medium',
|
|
title=f"Large JavaScript bundle detected",
|
|
description=(
|
|
f"The script '{script.get('name', 'Unknown')}' "
|
|
f"is {script['resourceBytes'] / 1024:.1f} KB. "
|
|
"Large bundles can slow down page load and increase memory usage."
|
|
),
|
|
tool='lighthouse',
|
|
affected_url=url,
|
|
remediation=(
|
|
"Consider code splitting, tree shaking, or lazy loading "
|
|
"to reduce bundle size."
|
|
),
|
|
raw_data=script
|
|
))
|
|
|
|
# Check for unused JavaScript
|
|
unused_js = resources.get('unusedJavascript', [])
|
|
if unused_js:
|
|
total_wasted = sum(u.get('wastedBytes', 0) for u in unused_js)
|
|
if total_wasted > 100 * 1024: # More than 100KB unused
|
|
issues.append(IssueData(
|
|
category='performance',
|
|
severity='medium',
|
|
title="Significant unused JavaScript detected",
|
|
description=(
|
|
f"Found {total_wasted / 1024:.1f} KB of unused JavaScript "
|
|
f"across {len(unused_js)} resources. This increases page "
|
|
"load time and memory usage."
|
|
),
|
|
tool='lighthouse',
|
|
affected_url=url,
|
|
remediation=(
|
|
"Remove unused code or use code splitting to load "
|
|
"JavaScript only when needed."
|
|
),
|
|
raw_data={'unused_resources': unused_js}
|
|
))
|
|
|
|
# Check for render-blocking resources
|
|
blocking = resources.get('renderBlockingResources', [])
|
|
if blocking:
|
|
total_wasted_ms = sum(r.get('wastedMs', 0) for r in blocking)
|
|
if total_wasted_ms > 500:
|
|
issues.append(IssueData(
|
|
category='performance',
|
|
severity='medium',
|
|
title="Render-blocking resources detected",
|
|
description=(
|
|
f"Found {len(blocking)} render-blocking resources "
|
|
f"adding approximately {total_wasted_ms:.0f}ms to page load. "
|
|
"These resources delay first paint."
|
|
),
|
|
tool='lighthouse',
|
|
affected_url=url,
|
|
remediation=(
|
|
"Consider inlining critical CSS, deferring non-critical JS, "
|
|
"or using async/defer attributes."
|
|
),
|
|
raw_data={'blocking_resources': blocking}
|
|
))
|
|
|
|
self.logger.info(
|
|
f"Lighthouse scan complete: {len(issues)} issues, {len(metrics)} metrics"
|
|
)
|
|
|
|
return ScannerResult(
|
|
scanner_name=self.name,
|
|
status=ScannerStatus.SUCCESS,
|
|
issues=issues,
|
|
metrics=metrics,
|
|
scores={
|
|
'performance': scores.get('performance', 0),
|
|
'accessibility': scores.get('accessibility', 0),
|
|
'best_practices': scores.get('bestPractices', 0),
|
|
'seo': scores.get('seo', 0),
|
|
},
|
|
raw_data=data
|
|
)
|
|
|
|
def _to_snake_case(self, name: str) -> str:
|
|
"""Convert camelCase to snake_case."""
|
|
import re
|
|
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
|
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
|
|
|
def _score_to_severity(self, score: float) -> str:
|
|
"""Convert Lighthouse score to severity level."""
|
|
if score is None:
|
|
return 'info'
|
|
elif score < 0.25:
|
|
return 'high'
|
|
elif score < 0.5:
|
|
return 'medium'
|
|
elif score < 0.75:
|
|
return 'low'
|
|
else:
|
|
return 'info'
|
|
|
|
def _map_category(self, lighthouse_category: str) -> str:
|
|
"""Map Lighthouse category to our category."""
|
|
mapping = {
|
|
'performance': 'performance',
|
|
'accessibility': 'accessibility',
|
|
'best-practices': 'best_practices',
|
|
'seo': 'seo',
|
|
}
|
|
return mapping.get(lighthouse_category, 'performance')
|
|
|
|
def _get_remediation(self, audit_id: str) -> str:
|
|
"""Get remediation text for known audit IDs."""
|
|
remediations = {
|
|
'first-contentful-paint': (
|
|
"Reduce server response time, eliminate render-blocking resources, "
|
|
"and optimize critical rendering path."
|
|
),
|
|
'largest-contentful-paint': (
|
|
"Optimize images, preload critical resources, and reduce server "
|
|
"response time."
|
|
),
|
|
'total-blocking-time': (
|
|
"Reduce JavaScript execution time by breaking up long tasks, "
|
|
"removing unused code, and minimizing main thread work."
|
|
),
|
|
'cumulative-layout-shift': (
|
|
"Always include size attributes on images and videos, reserve space "
|
|
"for ad slots, and avoid inserting content above existing content."
|
|
),
|
|
'speed-index': (
|
|
"Minimize main thread work, reduce JavaScript execution time, "
|
|
"and ensure text remains visible during webfont load."
|
|
),
|
|
'interactive': (
|
|
"Reduce JavaScript payload, defer non-critical scripts, and "
|
|
"minimize main thread work."
|
|
),
|
|
}
|
|
return remediations.get(audit_id, "Review and optimize based on the audit details.")
|