433 lines
16 KiB
Python
433 lines
16 KiB
Python
"""
|
|
Playwright Browser Scanner.
|
|
|
|
This module uses Playwright to perform browser-based analysis,
|
|
including console error capture, resource loading, and basic
|
|
memory usage indicators.
|
|
"""
|
|
|
|
import time
|
|
import logging
|
|
import asyncio
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
from django.conf import settings
|
|
|
|
from .base import BaseScanner, ScannerResult, ScannerStatus
|
|
|
|
logger = logging.getLogger('scanner')
|
|
|
|
|
|
class PlaywrightScanner(BaseScanner):
|
|
"""
|
|
Browser-based scanner using Playwright.
|
|
|
|
Captures:
|
|
- Console errors and warnings
|
|
- Network request metrics
|
|
- Large images and resources
|
|
- JavaScript errors
|
|
- Memory usage indicators
|
|
- Page load timing
|
|
"""
|
|
|
|
name = "playwright"
|
|
|
|
def __init__(self, config: dict = None):
|
|
super().__init__(config)
|
|
self.timeout = self.config.get(
|
|
'timeout',
|
|
settings.SCANNER_CONFIG.get('PLAYWRIGHT_TIMEOUT', 30000)
|
|
)
|
|
self.viewport = self.config.get(
|
|
'viewport',
|
|
settings.SCANNER_CONFIG.get('PLAYWRIGHT_VIEWPORT', {'width': 1920, 'height': 1080})
|
|
)
|
|
self.large_image_threshold = settings.SCANNER_CONFIG.get(
|
|
'LARGE_IMAGE_THRESHOLD_BYTES', 1024 * 1024
|
|
)
|
|
|
|
def is_available(self) -> bool:
|
|
"""Check if Playwright is available."""
|
|
try:
|
|
from playwright.sync_api import sync_playwright
|
|
return True
|
|
except ImportError:
|
|
self.logger.warning("Playwright not installed")
|
|
return False
|
|
|
|
def run(self, url: str) -> ScannerResult:
|
|
"""
|
|
Run browser-based analysis using Playwright.
|
|
|
|
Args:
|
|
url: The URL to analyze
|
|
|
|
Returns:
|
|
ScannerResult with browser analysis data
|
|
"""
|
|
start_time = time.time()
|
|
|
|
if not self.is_available():
|
|
return ScannerResult(
|
|
status=ScannerStatus.FAILED,
|
|
scanner_name=self.name,
|
|
error_message="Playwright is not available",
|
|
execution_time_seconds=time.time() - start_time
|
|
)
|
|
|
|
try:
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
with sync_playwright() as p:
|
|
# Launch browser
|
|
browser = p.chromium.launch(
|
|
headless=True,
|
|
args=[
|
|
'--no-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-gpu',
|
|
'--disable-extensions',
|
|
]
|
|
)
|
|
|
|
context = browser.new_context(
|
|
viewport=self.viewport,
|
|
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
)
|
|
|
|
page = context.new_page()
|
|
|
|
# Collect data
|
|
console_messages = []
|
|
network_requests = []
|
|
failed_requests = []
|
|
js_errors = []
|
|
|
|
# Console message handler
|
|
def handle_console(msg):
|
|
console_messages.append({
|
|
'type': msg.type,
|
|
'text': msg.text[:500], # Truncate long messages
|
|
'location': str(msg.location) if hasattr(msg, 'location') else None
|
|
})
|
|
|
|
# Request handler
|
|
def handle_request(request):
|
|
network_requests.append({
|
|
'url': request.url[:200],
|
|
'method': request.method,
|
|
'resource_type': request.resource_type,
|
|
})
|
|
|
|
# Response handler
|
|
def handle_response(response):
|
|
# Find the corresponding request
|
|
for req in network_requests:
|
|
if req['url'] == response.url[:200]:
|
|
req['status'] = response.status
|
|
try:
|
|
headers = response.headers
|
|
content_length = headers.get('content-length', '0')
|
|
req['size'] = int(content_length) if content_length else 0
|
|
except:
|
|
req['size'] = 0
|
|
break
|
|
|
|
# Request failed handler
|
|
def handle_request_failed(request):
|
|
failed_requests.append({
|
|
'url': request.url[:200],
|
|
'failure': request.failure,
|
|
'resource_type': request.resource_type,
|
|
})
|
|
|
|
# Page error handler
|
|
def handle_page_error(error):
|
|
js_errors.append({
|
|
'message': str(error)[:500],
|
|
})
|
|
|
|
# Attach handlers
|
|
page.on('console', handle_console)
|
|
page.on('request', handle_request)
|
|
page.on('response', handle_response)
|
|
page.on('requestfailed', handle_request_failed)
|
|
page.on('pageerror', handle_page_error)
|
|
|
|
# Navigate to page
|
|
load_start = time.time()
|
|
|
|
try:
|
|
page.goto(url, timeout=self.timeout, wait_until='networkidle')
|
|
except Exception as e:
|
|
# Try with less strict wait condition
|
|
self.logger.warning(f"Network idle timeout, trying load: {e}")
|
|
page.goto(url, timeout=self.timeout, wait_until='load')
|
|
|
|
load_time = (time.time() - load_start) * 1000 # ms
|
|
|
|
# Wait a bit more for any async content
|
|
page.wait_for_timeout(2000)
|
|
|
|
# Get performance metrics if available
|
|
performance_data = page.evaluate('''() => {
|
|
const timing = performance.timing;
|
|
const memory = performance.memory || {};
|
|
return {
|
|
domContentLoaded: timing.domContentLoadedEventEnd - timing.navigationStart,
|
|
loadComplete: timing.loadEventEnd - timing.navigationStart,
|
|
domInteractive: timing.domInteractive - timing.navigationStart,
|
|
firstPaint: performance.getEntriesByType('paint').find(p => p.name === 'first-paint')?.startTime || null,
|
|
firstContentfulPaint: performance.getEntriesByType('paint').find(p => p.name === 'first-contentful-paint')?.startTime || null,
|
|
jsHeapSizeLimit: memory.jsHeapSizeLimit || null,
|
|
totalJSHeapSize: memory.totalJSHeapSize || null,
|
|
usedJSHeapSize: memory.usedJSHeapSize || null,
|
|
};
|
|
}''')
|
|
|
|
# Close browser
|
|
browser.close()
|
|
|
|
# Process results
|
|
metrics = self._extract_metrics(
|
|
load_time,
|
|
performance_data,
|
|
network_requests
|
|
)
|
|
|
|
issues = self._extract_issues(
|
|
console_messages,
|
|
network_requests,
|
|
failed_requests,
|
|
js_errors,
|
|
performance_data
|
|
)
|
|
|
|
raw_data = {
|
|
'console_messages': console_messages[:50], # Limit size
|
|
'network_requests': network_requests[:100],
|
|
'failed_requests': failed_requests,
|
|
'js_errors': js_errors,
|
|
'performance': performance_data,
|
|
'load_time_ms': load_time,
|
|
}
|
|
|
|
execution_time = time.time() - start_time
|
|
|
|
return ScannerResult(
|
|
status=ScannerStatus.SUCCESS,
|
|
scanner_name=self.name,
|
|
metrics=metrics,
|
|
issues=issues,
|
|
raw_data=raw_data,
|
|
execution_time_seconds=execution_time
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.exception(f"Playwright scan failed for {url}")
|
|
return ScannerResult(
|
|
status=ScannerStatus.FAILED,
|
|
scanner_name=self.name,
|
|
error_message=f"Browser scan failed: {e}",
|
|
execution_time_seconds=time.time() - start_time
|
|
)
|
|
|
|
def _extract_metrics(
|
|
self,
|
|
load_time: float,
|
|
performance_data: dict,
|
|
network_requests: list
|
|
) -> list:
|
|
"""Extract metrics from browser data."""
|
|
metrics = []
|
|
|
|
# Page load time
|
|
metrics.append(self._create_metric(
|
|
name='page_load_time',
|
|
display_name='Page Load Time',
|
|
value=load_time,
|
|
unit='ms'
|
|
))
|
|
|
|
# DOM Content Loaded
|
|
if performance_data.get('domContentLoaded'):
|
|
metrics.append(self._create_metric(
|
|
name='dom_content_loaded',
|
|
display_name='DOM Content Loaded',
|
|
value=performance_data['domContentLoaded'],
|
|
unit='ms'
|
|
))
|
|
|
|
# DOM Interactive
|
|
if performance_data.get('domInteractive'):
|
|
metrics.append(self._create_metric(
|
|
name='dom_interactive',
|
|
display_name='DOM Interactive',
|
|
value=performance_data['domInteractive'],
|
|
unit='ms'
|
|
))
|
|
|
|
# Network metrics
|
|
total_requests = len(network_requests)
|
|
total_size = sum(r.get('size', 0) for r in network_requests)
|
|
|
|
metrics.append(self._create_metric(
|
|
name='total_requests_playwright',
|
|
display_name='Total Network Requests',
|
|
value=total_requests,
|
|
unit='count'
|
|
))
|
|
|
|
metrics.append(self._create_metric(
|
|
name='total_download_size',
|
|
display_name='Total Downloaded',
|
|
value=total_size,
|
|
unit='bytes'
|
|
))
|
|
|
|
# Request type breakdown
|
|
scripts = [r for r in network_requests if r.get('resource_type') == 'script']
|
|
stylesheets = [r for r in network_requests if r.get('resource_type') == 'stylesheet']
|
|
images = [r for r in network_requests if r.get('resource_type') == 'image']
|
|
fonts = [r for r in network_requests if r.get('resource_type') == 'font']
|
|
|
|
metrics.append(self._create_metric(
|
|
name='script_requests',
|
|
display_name='Script Requests',
|
|
value=len(scripts),
|
|
unit='count'
|
|
))
|
|
|
|
metrics.append(self._create_metric(
|
|
name='image_requests',
|
|
display_name='Image Requests',
|
|
value=len(images),
|
|
unit='count'
|
|
))
|
|
|
|
# Memory metrics
|
|
if performance_data.get('usedJSHeapSize'):
|
|
metrics.append(self._create_metric(
|
|
name='js_heap_used',
|
|
display_name='JS Heap Used',
|
|
value=performance_data['usedJSHeapSize'],
|
|
unit='bytes'
|
|
))
|
|
|
|
if performance_data.get('totalJSHeapSize'):
|
|
metrics.append(self._create_metric(
|
|
name='js_heap_total',
|
|
display_name='JS Heap Total',
|
|
value=performance_data['totalJSHeapSize'],
|
|
unit='bytes'
|
|
))
|
|
|
|
return metrics
|
|
|
|
def _extract_issues(
|
|
self,
|
|
console_messages: list,
|
|
network_requests: list,
|
|
failed_requests: list,
|
|
js_errors: list,
|
|
performance_data: dict
|
|
) -> list:
|
|
"""Extract issues from browser data."""
|
|
issues = []
|
|
|
|
# Console errors
|
|
errors = [m for m in console_messages if m.get('type') == 'error']
|
|
if errors:
|
|
issues.append(self._create_issue(
|
|
category='content',
|
|
severity='medium',
|
|
title=f'{len(errors)} console error(s) detected',
|
|
description='JavaScript console errors were detected on the page.',
|
|
remediation='Review and fix JavaScript errors to improve user experience.',
|
|
raw_data={'errors': errors[:10]}
|
|
))
|
|
|
|
# Console warnings
|
|
warnings = [m for m in console_messages if m.get('type') == 'warning']
|
|
if len(warnings) > 5:
|
|
issues.append(self._create_issue(
|
|
category='content',
|
|
severity='low',
|
|
title=f'{len(warnings)} console warning(s) detected',
|
|
description='Multiple JavaScript warnings were detected on the page.',
|
|
remediation='Review console warnings for potential issues.',
|
|
raw_data={'warnings': warnings[:10]}
|
|
))
|
|
|
|
# JavaScript page errors
|
|
if js_errors:
|
|
issues.append(self._create_issue(
|
|
category='content',
|
|
severity='high',
|
|
title=f'{len(js_errors)} JavaScript error(s) detected',
|
|
description='Uncaught JavaScript exceptions were detected.',
|
|
remediation='Fix JavaScript errors that could break page functionality.',
|
|
raw_data={'errors': js_errors}
|
|
))
|
|
|
|
# Failed network requests
|
|
if failed_requests:
|
|
issues.append(self._create_issue(
|
|
category='content',
|
|
severity='medium',
|
|
title=f'{len(failed_requests)} failed network request(s)',
|
|
description='Some resources failed to load.',
|
|
remediation='Ensure all resources are available and URLs are correct.',
|
|
raw_data={'failed': failed_requests}
|
|
))
|
|
|
|
# Large images
|
|
large_images = [
|
|
r for r in network_requests
|
|
if r.get('resource_type') == 'image' and r.get('size', 0) > self.large_image_threshold
|
|
]
|
|
if large_images:
|
|
issues.append(self._create_issue(
|
|
category='resources',
|
|
severity='medium',
|
|
title=f'{len(large_images)} large image(s) detected (>1MB)',
|
|
description='Large images slow down page load and increase bandwidth usage.',
|
|
remediation='Compress images and use modern formats like WebP or AVIF.',
|
|
raw_data={'images': [{'url': i['url'], 'size': i.get('size')} for i in large_images]}
|
|
))
|
|
|
|
# Too many requests
|
|
if len(network_requests) > 100:
|
|
issues.append(self._create_issue(
|
|
category='performance',
|
|
severity='medium',
|
|
title='High number of network requests',
|
|
description=f'Page makes {len(network_requests)} network requests, which can slow loading.',
|
|
remediation='Combine files, use sprites, and reduce third-party scripts.'
|
|
))
|
|
|
|
# High memory usage (potential memory issues)
|
|
used_heap = performance_data.get('usedJSHeapSize', 0)
|
|
total_heap = performance_data.get('totalJSHeapSize', 0)
|
|
|
|
if used_heap > 100 * 1024 * 1024: # > 100MB
|
|
issues.append(self._create_issue(
|
|
category='resources',
|
|
severity='medium',
|
|
title='High JavaScript memory usage',
|
|
description=f'Page uses {used_heap / (1024*1024):.1f}MB of JavaScript heap memory.',
|
|
remediation='Review for memory leaks and optimize JavaScript memory usage.'
|
|
))
|
|
|
|
if total_heap > 0 and used_heap / total_heap > 0.9:
|
|
issues.append(self._create_issue(
|
|
category='resources',
|
|
severity='high',
|
|
title='JavaScript heap near capacity',
|
|
description='JavaScript heap is using >90% of available memory, risking out-of-memory errors.',
|
|
remediation='Investigate potential memory leaks and reduce memory consumption.'
|
|
))
|
|
|
|
return issues
|