Initial commit: Lighthouse scanner service
This commit is contained in:
commit
90ad47a721
|
|
@ -0,0 +1,27 @@
|
|||
# Django Core Settings
|
||||
DEBUG=True
|
||||
SECRET_KEY=your-secret-key-change-in-production-abc123xyz789
|
||||
ALLOWED_HOSTS=localhost,127.0.0.1,web
|
||||
|
||||
# Database
|
||||
DATABASE_URL=postgres://analyzer:analyzer_password@db:5432/website_analyzer
|
||||
|
||||
# Redis & Celery
|
||||
REDIS_URL=redis://redis:6379/0
|
||||
CELERY_BROKER_URL=redis://redis:6379/0
|
||||
CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
|
||||
# OWASP ZAP Configuration
|
||||
ZAP_API_KEY=zap-api-key-change-me
|
||||
ZAP_HOST=http://zap:8080
|
||||
|
||||
# Lighthouse Configuration
|
||||
LIGHTHOUSE_CHROME_FLAGS=--headless --no-sandbox --disable-gpu
|
||||
|
||||
# Scan Settings
|
||||
MAX_SCAN_TIME_SECONDS=300
|
||||
SCAN_RATE_LIMIT_MINUTES=5
|
||||
MAX_CONCURRENT_SCANS=3
|
||||
|
||||
# Security
|
||||
CORS_ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8000
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
# Website Analyzer Backend - Dockerfile
|
||||
# Multi-stage build for efficient image size
|
||||
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
libpq-dev \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --user -r requirements.txt
|
||||
|
||||
# Install Playwright and its dependencies
|
||||
RUN pip install --user playwright && \
|
||||
python -m playwright install chromium && \
|
||||
python -m playwright install-deps chromium
|
||||
|
||||
# ==========================================================================
|
||||
# Production Stage
|
||||
# ==========================================================================
|
||||
FROM python:3.11-slim
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PATH="/root/.local/bin:$PATH"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libpq5 \
|
||||
curl \
|
||||
# Playwright/Chromium dependencies
|
||||
libnss3 \
|
||||
libnspr4 \
|
||||
libatk1.0-0 \
|
||||
libatk-bridge2.0-0 \
|
||||
libcups2 \
|
||||
libdrm2 \
|
||||
libdbus-1-3 \
|
||||
libxkbcommon0 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxfixes3 \
|
||||
libxrandr2 \
|
||||
libgbm1 \
|
||||
libasound2 \
|
||||
libpango-1.0-0 \
|
||||
libcairo2 \
|
||||
libatspi2.0-0 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy Python packages from builder
|
||||
COPY --from=builder /root/.local /root/.local
|
||||
COPY --from=builder /root/.cache/ms-playwright /root/.cache/ms-playwright
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create logs directory
|
||||
RUN mkdir -p logs staticfiles
|
||||
|
||||
# Create non-root user for security
|
||||
RUN useradd -m -u 1000 appuser && \
|
||||
chown -R appuser:appuser /app /root/.local /root/.cache
|
||||
USER appuser
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Default command
|
||||
CMD ["gunicorn", "core.wsgi:application", "--bind", "0.0.0.0:8000", "--workers", "4"]
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
"""
|
||||
API app initialization.
|
||||
"""
|
||||
|
||||
default_app_config = 'api.apps.ApiConfig'
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
"""
|
||||
API app configuration.
|
||||
"""
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class ApiConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'api'
|
||||
verbose_name = 'REST API'
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
"""
|
||||
Custom exception handler for DRF.
|
||||
"""
|
||||
|
||||
from rest_framework.views import exception_handler
|
||||
from rest_framework.response import Response
|
||||
from rest_framework import status
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def custom_exception_handler(exc, context):
|
||||
"""
|
||||
Custom exception handler that provides consistent error responses.
|
||||
|
||||
Handles common exceptions and formats them consistently.
|
||||
"""
|
||||
# Call REST framework's default exception handler first
|
||||
response = exception_handler(exc, context)
|
||||
|
||||
if response is not None:
|
||||
# Customize the response data
|
||||
custom_response_data = {
|
||||
'error': True,
|
||||
'status_code': response.status_code,
|
||||
}
|
||||
|
||||
if isinstance(response.data, dict):
|
||||
if 'detail' in response.data:
|
||||
custom_response_data['message'] = str(response.data['detail'])
|
||||
else:
|
||||
custom_response_data['errors'] = response.data
|
||||
elif isinstance(response.data, list):
|
||||
custom_response_data['errors'] = response.data
|
||||
else:
|
||||
custom_response_data['message'] = str(response.data)
|
||||
|
||||
response.data = custom_response_data
|
||||
return response
|
||||
|
||||
# Handle unexpected exceptions
|
||||
logger.exception(f"Unhandled exception: {exc}")
|
||||
|
||||
return Response(
|
||||
{
|
||||
'error': True,
|
||||
'status_code': 500,
|
||||
'message': 'An unexpected error occurred',
|
||||
},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
|
@ -0,0 +1,243 @@
|
|||
"""
|
||||
DRF Serializers for the API.
|
||||
|
||||
This module defines serializers for converting model instances
|
||||
to JSON and validating input data.
|
||||
"""
|
||||
|
||||
from rest_framework import serializers
|
||||
from websites.models import Website, Scan, Issue, Metric, ScanStatus
|
||||
|
||||
|
||||
class IssueSerializer(serializers.ModelSerializer):
|
||||
"""Serializer for Issue model."""
|
||||
|
||||
severity_display = serializers.CharField(source='get_severity_display', read_only=True)
|
||||
category_display = serializers.CharField(source='get_category_display', read_only=True)
|
||||
tool_display = serializers.CharField(source='get_tool_display', read_only=True)
|
||||
|
||||
class Meta:
|
||||
model = Issue
|
||||
fields = [
|
||||
'id',
|
||||
'category',
|
||||
'category_display',
|
||||
'severity',
|
||||
'severity_display',
|
||||
'tool',
|
||||
'tool_display',
|
||||
'title',
|
||||
'description',
|
||||
'affected_url',
|
||||
'remediation',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
|
||||
class MetricSerializer(serializers.ModelSerializer):
|
||||
"""Serializer for Metric model."""
|
||||
|
||||
formatted_value = serializers.CharField(source='get_formatted_value', read_only=True)
|
||||
unit_display = serializers.CharField(source='get_unit_display', read_only=True)
|
||||
|
||||
class Meta:
|
||||
model = Metric
|
||||
fields = [
|
||||
'id',
|
||||
'name',
|
||||
'display_name',
|
||||
'value',
|
||||
'unit',
|
||||
'unit_display',
|
||||
'formatted_value',
|
||||
'source',
|
||||
'score',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
|
||||
class ScanListSerializer(serializers.ModelSerializer):
|
||||
"""Serializer for Scan list views (minimal data)."""
|
||||
|
||||
status_display = serializers.CharField(source='get_status_display', read_only=True)
|
||||
website_url = serializers.CharField(source='website.url', read_only=True)
|
||||
issues_count = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = Scan
|
||||
fields = [
|
||||
'id',
|
||||
'website_url',
|
||||
'status',
|
||||
'status_display',
|
||||
'created_at',
|
||||
'completed_at',
|
||||
'overall_score',
|
||||
'performance_score',
|
||||
'security_score',
|
||||
'issues_count',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
def get_issues_count(self, obj):
|
||||
return obj.issues.count()
|
||||
|
||||
|
||||
class ScanDetailSerializer(serializers.ModelSerializer):
|
||||
"""Serializer for Scan detail views (full data)."""
|
||||
|
||||
status_display = serializers.CharField(source='get_status_display', read_only=True)
|
||||
website_url = serializers.CharField(source='website.url', read_only=True)
|
||||
website_domain = serializers.CharField(source='website.domain', read_only=True)
|
||||
issues = IssueSerializer(many=True, read_only=True)
|
||||
metrics = MetricSerializer(many=True, read_only=True)
|
||||
issues_by_category = serializers.SerializerMethodField()
|
||||
issues_by_severity = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = Scan
|
||||
fields = [
|
||||
'id',
|
||||
'website_url',
|
||||
'website_domain',
|
||||
'status',
|
||||
'status_display',
|
||||
'created_at',
|
||||
'started_at',
|
||||
'completed_at',
|
||||
'overall_score',
|
||||
'performance_score',
|
||||
'accessibility_score',
|
||||
'seo_score',
|
||||
'best_practices_score',
|
||||
'security_score',
|
||||
'error_message',
|
||||
'issues',
|
||||
'metrics',
|
||||
'issues_by_category',
|
||||
'issues_by_severity',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
def get_issues_by_category(self, obj):
|
||||
"""Group issues by category."""
|
||||
from collections import defaultdict
|
||||
grouped = defaultdict(list)
|
||||
|
||||
for issue in obj.issues.all():
|
||||
grouped[issue.category].append(IssueSerializer(issue).data)
|
||||
|
||||
return dict(grouped)
|
||||
|
||||
def get_issues_by_severity(self, obj):
|
||||
"""Count issues by severity."""
|
||||
from django.db.models import Count
|
||||
|
||||
counts = obj.issues.values('severity').annotate(count=Count('id'))
|
||||
return {item['severity']: item['count'] for item in counts}
|
||||
|
||||
|
||||
class ScanCreateSerializer(serializers.Serializer):
|
||||
"""Serializer for creating new scans."""
|
||||
|
||||
url = serializers.URLField(
|
||||
required=True,
|
||||
help_text="The URL to scan (must be http or https)"
|
||||
)
|
||||
|
||||
def validate_url(self, value):
|
||||
"""Validate and normalize the URL."""
|
||||
from scanner.utils import validate_url
|
||||
|
||||
is_valid, result = validate_url(value)
|
||||
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError(result)
|
||||
|
||||
return result # Return normalized URL
|
||||
|
||||
def create(self, validated_data):
|
||||
"""Create Website and Scan records."""
|
||||
from scanner.tasks import check_rate_limit, check_concurrent_scan_limit, run_scan_task
|
||||
|
||||
url = validated_data['url']
|
||||
|
||||
# Check rate limit
|
||||
rate_limit_error = check_rate_limit(url)
|
||||
if rate_limit_error:
|
||||
raise serializers.ValidationError({'url': rate_limit_error})
|
||||
|
||||
# Check concurrent scan limit
|
||||
concurrent_error = check_concurrent_scan_limit()
|
||||
if concurrent_error:
|
||||
raise serializers.ValidationError({'non_field_errors': concurrent_error})
|
||||
|
||||
# Get or create Website
|
||||
website, created = Website.objects.get_or_create(
|
||||
url=url,
|
||||
defaults={'domain': validated_data.get('domain', '')}
|
||||
)
|
||||
|
||||
# Create Scan
|
||||
scan = Scan.objects.create(
|
||||
website=website,
|
||||
status=ScanStatus.PENDING
|
||||
)
|
||||
|
||||
# Trigger Celery task
|
||||
task = run_scan_task.delay(str(scan.id))
|
||||
|
||||
# Update scan with task ID
|
||||
scan.celery_task_id = task.id
|
||||
scan.save(update_fields=['celery_task_id'])
|
||||
|
||||
return scan
|
||||
|
||||
|
||||
class WebsiteSerializer(serializers.ModelSerializer):
|
||||
"""Serializer for Website model."""
|
||||
|
||||
scans_count = serializers.SerializerMethodField()
|
||||
latest_scan = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = Website
|
||||
fields = [
|
||||
'id',
|
||||
'url',
|
||||
'domain',
|
||||
'created_at',
|
||||
'last_scanned_at',
|
||||
'scans_count',
|
||||
'latest_scan',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
def get_scans_count(self, obj):
|
||||
return obj.scans.count()
|
||||
|
||||
def get_latest_scan(self, obj):
|
||||
latest = obj.scans.first()
|
||||
if latest:
|
||||
return ScanListSerializer(latest).data
|
||||
return None
|
||||
|
||||
|
||||
class WebsiteDetailSerializer(WebsiteSerializer):
|
||||
"""Detailed Website serializer with scan list."""
|
||||
|
||||
scans = ScanListSerializer(many=True, read_only=True)
|
||||
|
||||
class Meta(WebsiteSerializer.Meta):
|
||||
fields = WebsiteSerializer.Meta.fields + ['scans']
|
||||
|
||||
|
||||
class HealthCheckSerializer(serializers.Serializer):
|
||||
"""Serializer for health check response."""
|
||||
|
||||
status = serializers.CharField()
|
||||
database = serializers.CharField()
|
||||
redis = serializers.CharField()
|
||||
celery = serializers.CharField()
|
||||
timestamp = serializers.DateTimeField()
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
"""
|
||||
URL routing for the API.
|
||||
"""
|
||||
|
||||
from django.urls import path, include
|
||||
from rest_framework.routers import DefaultRouter
|
||||
from . import views
|
||||
|
||||
router = DefaultRouter()
|
||||
router.register(r'scans', views.ScanViewSet, basename='scan')
|
||||
router.register(r'websites', views.WebsiteViewSet, basename='website')
|
||||
router.register(r'issues', views.IssueViewSet, basename='issue')
|
||||
|
||||
urlpatterns = [
|
||||
path('', views.api_root, name='api-root'),
|
||||
path('health/', views.health_check, name='health-check'),
|
||||
path('', include(router.urls)),
|
||||
]
|
||||
|
|
@ -0,0 +1,336 @@
|
|||
"""
|
||||
DRF Views for the API.
|
||||
|
||||
This module defines API views for scans, websites, and issues.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from django.db import connection
|
||||
from django.utils import timezone
|
||||
from django.core.cache import cache
|
||||
from rest_framework import viewsets, status, generics
|
||||
from rest_framework.decorators import api_view, action
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.pagination import PageNumberPagination
|
||||
from rest_framework.throttling import AnonRateThrottle
|
||||
|
||||
from websites.models import Website, Scan, Issue, Metric
|
||||
from .serializers import (
|
||||
WebsiteSerializer,
|
||||
WebsiteDetailSerializer,
|
||||
ScanListSerializer,
|
||||
ScanDetailSerializer,
|
||||
ScanCreateSerializer,
|
||||
IssueSerializer,
|
||||
MetricSerializer,
|
||||
HealthCheckSerializer,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ScanRateThrottle(AnonRateThrottle):
|
||||
"""Custom throttle for scan creation."""
|
||||
rate = '10/hour'
|
||||
|
||||
|
||||
class StandardResultsPagination(PageNumberPagination):
|
||||
"""Standard pagination for list views."""
|
||||
page_size = 20
|
||||
page_size_query_param = 'page_size'
|
||||
max_page_size = 100
|
||||
|
||||
|
||||
class ScanViewSet(viewsets.ModelViewSet):
|
||||
"""
|
||||
ViewSet for Scan operations.
|
||||
|
||||
Endpoints:
|
||||
- POST /api/scans/ - Create a new scan
|
||||
- GET /api/scans/ - List all scans
|
||||
- GET /api/scans/{id}/ - Get scan details
|
||||
- DELETE /api/scans/{id}/ - Delete a scan
|
||||
"""
|
||||
|
||||
queryset = Scan.objects.select_related('website').prefetch_related('issues', 'metrics')
|
||||
pagination_class = StandardResultsPagination
|
||||
|
||||
def get_serializer_class(self):
|
||||
if self.action == 'list':
|
||||
return ScanListSerializer
|
||||
elif self.action == 'create':
|
||||
return ScanCreateSerializer
|
||||
return ScanDetailSerializer
|
||||
|
||||
def get_throttles(self):
|
||||
if self.action == 'create':
|
||||
return [ScanRateThrottle()]
|
||||
return super().get_throttles()
|
||||
|
||||
def create(self, request, *args, **kwargs):
|
||||
"""
|
||||
Create a new scan.
|
||||
|
||||
Request body:
|
||||
```json
|
||||
{"url": "https://example.com"}
|
||||
```
|
||||
|
||||
Returns the created scan with pending status.
|
||||
The scan will be processed asynchronously.
|
||||
"""
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
|
||||
try:
|
||||
scan = serializer.save()
|
||||
|
||||
# Return the created scan details
|
||||
response_serializer = ScanDetailSerializer(scan)
|
||||
return Response(
|
||||
response_serializer.data,
|
||||
status=status.HTTP_201_CREATED
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("Error creating scan")
|
||||
return Response(
|
||||
{'error': str(e)},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
@action(detail=True, methods=['get'])
|
||||
def issues(self, request, pk=None):
|
||||
"""Get all issues for a scan."""
|
||||
scan = self.get_object()
|
||||
issues = scan.issues.all()
|
||||
|
||||
# Optional filtering
|
||||
category = request.query_params.get('category')
|
||||
severity = request.query_params.get('severity')
|
||||
tool = request.query_params.get('tool')
|
||||
|
||||
if category:
|
||||
issues = issues.filter(category=category)
|
||||
if severity:
|
||||
issues = issues.filter(severity=severity)
|
||||
if tool:
|
||||
issues = issues.filter(tool=tool)
|
||||
|
||||
serializer = IssueSerializer(issues, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
@action(detail=True, methods=['get'])
|
||||
def metrics(self, request, pk=None):
|
||||
"""Get all metrics for a scan."""
|
||||
scan = self.get_object()
|
||||
metrics = scan.metrics.all()
|
||||
|
||||
# Optional filtering by source
|
||||
source = request.query_params.get('source')
|
||||
if source:
|
||||
metrics = metrics.filter(source=source)
|
||||
|
||||
serializer = MetricSerializer(metrics, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
@action(detail=True, methods=['get'])
|
||||
def status(self, request, pk=None):
|
||||
"""Get just the status of a scan (for polling)."""
|
||||
scan = self.get_object()
|
||||
return Response({
|
||||
'id': str(scan.id),
|
||||
'status': scan.status,
|
||||
'status_display': scan.get_status_display(),
|
||||
'progress': self._get_scan_progress(scan),
|
||||
})
|
||||
|
||||
def _get_scan_progress(self, scan):
|
||||
"""Estimate scan progress based on status and results."""
|
||||
if scan.status == 'done':
|
||||
return 100
|
||||
elif scan.status == 'failed':
|
||||
return 0
|
||||
elif scan.status == 'running':
|
||||
# Estimate based on what data we have
|
||||
progress = 10 # Started
|
||||
if scan.raw_headers_data:
|
||||
progress += 20
|
||||
if scan.raw_playwright_data:
|
||||
progress += 25
|
||||
if scan.raw_lighthouse_data:
|
||||
progress += 30
|
||||
if scan.raw_zap_data:
|
||||
progress += 15
|
||||
return min(progress, 95)
|
||||
return 0
|
||||
|
||||
|
||||
class WebsiteViewSet(viewsets.ReadOnlyModelViewSet):
|
||||
"""
|
||||
ViewSet for Website operations.
|
||||
|
||||
Endpoints:
|
||||
- GET /api/websites/ - List all websites
|
||||
- GET /api/websites/{id}/ - Get website details
|
||||
- GET /api/websites/{id}/scans/ - Get scans for a website
|
||||
"""
|
||||
|
||||
queryset = Website.objects.prefetch_related('scans')
|
||||
pagination_class = StandardResultsPagination
|
||||
|
||||
def get_serializer_class(self):
|
||||
if self.action == 'retrieve':
|
||||
return WebsiteDetailSerializer
|
||||
return WebsiteSerializer
|
||||
|
||||
@action(detail=True, methods=['get'])
|
||||
def scans(self, request, pk=None):
|
||||
"""Get all scans for a website."""
|
||||
website = self.get_object()
|
||||
scans = website.scans.all()
|
||||
|
||||
# Apply pagination
|
||||
page = self.paginate_queryset(scans)
|
||||
if page is not None:
|
||||
serializer = ScanListSerializer(page, many=True)
|
||||
return self.get_paginated_response(serializer.data)
|
||||
|
||||
serializer = ScanListSerializer(scans, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
|
||||
class IssueViewSet(viewsets.ReadOnlyModelViewSet):
|
||||
"""
|
||||
ViewSet for Issue operations.
|
||||
|
||||
Endpoints:
|
||||
- GET /api/issues/ - List all issues (with filtering)
|
||||
- GET /api/issues/{id}/ - Get issue details
|
||||
"""
|
||||
|
||||
queryset = Issue.objects.select_related('scan', 'scan__website')
|
||||
serializer_class = IssueSerializer
|
||||
pagination_class = StandardResultsPagination
|
||||
|
||||
def get_queryset(self):
|
||||
queryset = super().get_queryset()
|
||||
|
||||
# Filter by scan
|
||||
scan_id = self.request.query_params.get('scan')
|
||||
if scan_id:
|
||||
queryset = queryset.filter(scan_id=scan_id)
|
||||
|
||||
# Filter by category
|
||||
category = self.request.query_params.get('category')
|
||||
if category:
|
||||
queryset = queryset.filter(category=category)
|
||||
|
||||
# Filter by severity
|
||||
severity = self.request.query_params.get('severity')
|
||||
if severity:
|
||||
queryset = queryset.filter(severity=severity)
|
||||
|
||||
# Filter by tool
|
||||
tool = self.request.query_params.get('tool')
|
||||
if tool:
|
||||
queryset = queryset.filter(tool=tool)
|
||||
|
||||
return queryset
|
||||
|
||||
|
||||
@api_view(['GET'])
|
||||
def health_check(request):
|
||||
"""
|
||||
Health check endpoint.
|
||||
|
||||
Checks:
|
||||
- Database connectivity
|
||||
- Redis connectivity
|
||||
- Celery worker status
|
||||
|
||||
Returns health status of all components.
|
||||
"""
|
||||
health = {
|
||||
'status': 'healthy',
|
||||
'database': 'unknown',
|
||||
'redis': 'unknown',
|
||||
'celery': 'unknown',
|
||||
'timestamp': timezone.now(),
|
||||
}
|
||||
|
||||
# Check database
|
||||
try:
|
||||
connection.ensure_connection()
|
||||
health['database'] = 'healthy'
|
||||
except Exception as e:
|
||||
health['database'] = f'unhealthy: {e}'
|
||||
health['status'] = 'unhealthy'
|
||||
|
||||
# Check Redis
|
||||
try:
|
||||
cache.set('health_check', 'ok', 10)
|
||||
if cache.get('health_check') == 'ok':
|
||||
health['redis'] = 'healthy'
|
||||
else:
|
||||
health['redis'] = 'unhealthy: cache not working'
|
||||
health['status'] = 'degraded'
|
||||
except Exception as e:
|
||||
health['redis'] = f'unhealthy: {e}'
|
||||
health['status'] = 'degraded'
|
||||
|
||||
# Check Celery (basic check)
|
||||
try:
|
||||
from core.celery import app as celery_app
|
||||
inspect = celery_app.control.inspect()
|
||||
|
||||
# Try to get active workers
|
||||
active = inspect.active()
|
||||
if active:
|
||||
health['celery'] = f'healthy ({len(active)} workers)'
|
||||
else:
|
||||
health['celery'] = 'degraded: no active workers'
|
||||
health['status'] = 'degraded'
|
||||
except Exception as e:
|
||||
health['celery'] = f'unknown: {e}'
|
||||
|
||||
status_code = 200 if health['status'] == 'healthy' else 503
|
||||
|
||||
serializer = HealthCheckSerializer(health)
|
||||
return Response(serializer.data, status=status_code)
|
||||
|
||||
|
||||
@api_view(['GET'])
|
||||
def api_root(request):
|
||||
"""
|
||||
API root endpoint.
|
||||
|
||||
Returns available endpoints and basic API information.
|
||||
"""
|
||||
return Response({
|
||||
'message': 'Website Analyzer API',
|
||||
'version': '1.0.0',
|
||||
'endpoints': {
|
||||
'scans': '/api/scans/',
|
||||
'websites': '/api/websites/',
|
||||
'issues': '/api/issues/',
|
||||
'health': '/api/health/',
|
||||
},
|
||||
'documentation': {
|
||||
'create_scan': {
|
||||
'method': 'POST',
|
||||
'url': '/api/scans/',
|
||||
'body': {'url': 'https://example.com'},
|
||||
'description': 'Create a new website scan'
|
||||
},
|
||||
'get_scan': {
|
||||
'method': 'GET',
|
||||
'url': '/api/scans/{id}/',
|
||||
'description': 'Get scan results and details'
|
||||
},
|
||||
'list_scans': {
|
||||
'method': 'GET',
|
||||
'url': '/api/scans/',
|
||||
'description': 'List all scans with pagination'
|
||||
},
|
||||
}
|
||||
})
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
"""
|
||||
Core module initialization.
|
||||
|
||||
This module loads the Celery app so that shared_task will use this app.
|
||||
"""
|
||||
|
||||
from .celery import app as celery_app
|
||||
|
||||
__all__ = ('celery_app',)
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
"""
|
||||
ASGI config for Website Analyzer project.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.asgi import get_asgi_application
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
|
||||
application = get_asgi_application()
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
"""
|
||||
Celery configuration for Website Analyzer.
|
||||
|
||||
This module configures Celery for asynchronous task processing,
|
||||
specifically for running website scans in the background.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
|
||||
# Set the default Django settings module for the 'celery' program.
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
|
||||
app = Celery('website_analyzer')
|
||||
|
||||
# Using a string here means the worker doesn't have to serialize
|
||||
# the configuration object to child processes.
|
||||
app.config_from_object('django.conf:settings', namespace='CELERY')
|
||||
|
||||
# Load task modules from all registered Django apps.
|
||||
app.autodiscover_tasks()
|
||||
|
||||
|
||||
@app.task(bind=True, ignore_result=True)
|
||||
def debug_task(self):
|
||||
"""Debug task for testing Celery connectivity."""
|
||||
print(f'Request: {self.request!r}')
|
||||
|
|
@ -0,0 +1,300 @@
|
|||
"""
|
||||
Django settings for Website Analyzer project.
|
||||
|
||||
This module contains all configuration settings for the Django application,
|
||||
including database, caching, security, and third-party integrations.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret!
|
||||
SECRET_KEY = os.getenv('SECRET_KEY', 'django-insecure-change-me-in-production')
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = os.getenv('DEBUG', 'False').lower() in ('true', '1', 'yes')
|
||||
|
||||
ALLOWED_HOSTS = os.getenv('ALLOWED_HOSTS', 'localhost,127.0.0.1').split(',')
|
||||
|
||||
|
||||
# Application definition
|
||||
|
||||
INSTALLED_APPS = [
|
||||
'django.contrib.admin',
|
||||
'django.contrib.auth',
|
||||
'django.contrib.contenttypes',
|
||||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
|
||||
# Third-party apps
|
||||
'rest_framework',
|
||||
'corsheaders',
|
||||
|
||||
# Local apps
|
||||
'websites',
|
||||
'scanner',
|
||||
'api',
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
'django.middleware.security.SecurityMiddleware',
|
||||
'whitenoise.middleware.WhiteNoiseMiddleware',
|
||||
'corsheaders.middleware.CorsMiddleware',
|
||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
'django.middleware.csrf.CsrfViewMiddleware',
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'core.urls'
|
||||
|
||||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [BASE_DIR / 'templates'],
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
'django.template.context_processors.debug',
|
||||
'django.template.context_processors.request',
|
||||
'django.contrib.auth.context_processors.auth',
|
||||
'django.contrib.messages.context_processors.messages',
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
WSGI_APPLICATION = 'core.wsgi.application'
|
||||
|
||||
|
||||
# Database
|
||||
# Parse DATABASE_URL or use default PostgreSQL settings
|
||||
|
||||
DATABASE_URL = os.getenv('DATABASE_URL', 'postgres://analyzer:analyzer_password@localhost:5432/website_analyzer')
|
||||
|
||||
# Parse the DATABASE_URL
|
||||
import re
|
||||
db_pattern = r'postgres://(?P<user>[^:]+):(?P<password>[^@]+)@(?P<host>[^:]+):(?P<port>\d+)/(?P<name>.+)'
|
||||
db_match = re.match(db_pattern, DATABASE_URL)
|
||||
|
||||
if db_match:
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.postgresql',
|
||||
'NAME': db_match.group('name'),
|
||||
'USER': db_match.group('user'),
|
||||
'PASSWORD': db_match.group('password'),
|
||||
'HOST': db_match.group('host'),
|
||||
'PORT': db_match.group('port'),
|
||||
}
|
||||
}
|
||||
else:
|
||||
# Fallback for development
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': BASE_DIR / 'db.sqlite3',
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Password validation
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# Internationalization
|
||||
LANGUAGE_CODE = 'en-us'
|
||||
TIME_ZONE = 'UTC'
|
||||
USE_I18N = True
|
||||
USE_TZ = True
|
||||
|
||||
|
||||
# Static files (CSS, JavaScript, Images)
|
||||
STATIC_URL = 'static/'
|
||||
STATIC_ROOT = BASE_DIR / 'staticfiles'
|
||||
STATICFILES_DIRS = [BASE_DIR / 'static']
|
||||
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
|
||||
|
||||
# Default primary key field type
|
||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# REST Framework Configuration
|
||||
# =============================================================================
|
||||
REST_FRAMEWORK = {
|
||||
'DEFAULT_RENDERER_CLASSES': [
|
||||
'rest_framework.renderers.JSONRenderer',
|
||||
'rest_framework.renderers.BrowsableAPIRenderer',
|
||||
],
|
||||
'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.PageNumberPagination',
|
||||
'PAGE_SIZE': 20,
|
||||
'DEFAULT_THROTTLE_CLASSES': [
|
||||
'rest_framework.throttling.AnonRateThrottle',
|
||||
'rest_framework.throttling.UserRateThrottle'
|
||||
],
|
||||
'DEFAULT_THROTTLE_RATES': {
|
||||
'anon': '100/hour',
|
||||
'user': '1000/hour',
|
||||
'scan': '10/hour', # Specific rate for scan creation
|
||||
},
|
||||
'EXCEPTION_HANDLER': 'api.exceptions.custom_exception_handler',
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CORS Configuration
|
||||
# =============================================================================
|
||||
CORS_ALLOWED_ORIGINS = os.getenv(
|
||||
'CORS_ALLOWED_ORIGINS',
|
||||
'http://localhost:3000,http://localhost:8000'
|
||||
).split(',')
|
||||
CORS_ALLOW_CREDENTIALS = True
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Celery Configuration
|
||||
# =============================================================================
|
||||
CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')
|
||||
CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/1')
|
||||
CELERY_ACCEPT_CONTENT = ['json']
|
||||
CELERY_TASK_SERIALIZER = 'json'
|
||||
CELERY_RESULT_SERIALIZER = 'json'
|
||||
CELERY_TIMEZONE = TIME_ZONE
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
CELERY_TASK_TIME_LIMIT = int(os.getenv('MAX_SCAN_TIME_SECONDS', '300'))
|
||||
CELERY_TASK_SOFT_TIME_LIMIT = CELERY_TASK_TIME_LIMIT - 30
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Redis Cache Configuration
|
||||
# =============================================================================
|
||||
REDIS_URL = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
|
||||
CACHES = {
|
||||
'default': {
|
||||
'BACKEND': 'django.core.cache.backends.redis.RedisCache',
|
||||
'LOCATION': REDIS_URL,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Scanner Configuration
|
||||
# =============================================================================
|
||||
SCANNER_CONFIG = {
|
||||
# OWASP ZAP settings
|
||||
'ZAP_API_KEY': os.getenv('ZAP_API_KEY', ''),
|
||||
'ZAP_HOST': os.getenv('ZAP_HOST', 'http://localhost:8080'),
|
||||
'ZAP_TIMEOUT': 120,
|
||||
|
||||
# Lighthouse settings
|
||||
'LIGHTHOUSE_CHROME_FLAGS': os.getenv(
|
||||
'LIGHTHOUSE_CHROME_FLAGS',
|
||||
'--headless --no-sandbox --disable-gpu'
|
||||
),
|
||||
'LIGHTHOUSE_TIMEOUT': 60,
|
||||
|
||||
# Playwright settings
|
||||
'PLAYWRIGHT_TIMEOUT': 30000, # milliseconds
|
||||
'PLAYWRIGHT_VIEWPORT': {'width': 1920, 'height': 1080},
|
||||
|
||||
# General scan settings
|
||||
'MAX_SCAN_TIME_SECONDS': int(os.getenv('MAX_SCAN_TIME_SECONDS', '300')),
|
||||
'SCAN_RATE_LIMIT_MINUTES': int(os.getenv('SCAN_RATE_LIMIT_MINUTES', '5')),
|
||||
'MAX_CONCURRENT_SCANS': int(os.getenv('MAX_CONCURRENT_SCANS', '3')),
|
||||
|
||||
# Safety settings - blocked IP ranges (RFC1918 private ranges + localhost)
|
||||
'BLOCKED_IP_RANGES': [
|
||||
'10.0.0.0/8',
|
||||
'172.16.0.0/12',
|
||||
'192.168.0.0/16',
|
||||
'127.0.0.0/8',
|
||||
'169.254.0.0/16', # Link-local
|
||||
'::1/128', # IPv6 localhost
|
||||
'fc00::/7', # IPv6 private
|
||||
'fe80::/10', # IPv6 link-local
|
||||
],
|
||||
'BLOCKED_HOSTS': ['localhost', 'localhost.localdomain'],
|
||||
|
||||
# Large file thresholds
|
||||
'LARGE_IMAGE_THRESHOLD_BYTES': 1024 * 1024, # 1 MB
|
||||
'LARGE_JS_BUNDLE_THRESHOLD_BYTES': 500 * 1024, # 500 KB
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Logging Configuration
|
||||
# =============================================================================
|
||||
LOGGING = {
|
||||
'version': 1,
|
||||
'disable_existing_loggers': False,
|
||||
'formatters': {
|
||||
'verbose': {
|
||||
'format': '{levelname} {asctime} {module} {process:d} {thread:d} {message}',
|
||||
'style': '{',
|
||||
},
|
||||
'simple': {
|
||||
'format': '{levelname} {asctime} {module} {message}',
|
||||
'style': '{',
|
||||
},
|
||||
},
|
||||
'handlers': {
|
||||
'console': {
|
||||
'class': 'logging.StreamHandler',
|
||||
'formatter': 'simple',
|
||||
},
|
||||
'file': {
|
||||
'class': 'logging.FileHandler',
|
||||
'filename': BASE_DIR / 'logs' / 'django.log',
|
||||
'formatter': 'verbose',
|
||||
},
|
||||
},
|
||||
'root': {
|
||||
'handlers': ['console'],
|
||||
'level': 'INFO',
|
||||
},
|
||||
'loggers': {
|
||||
'django': {
|
||||
'handlers': ['console'],
|
||||
'level': os.getenv('DJANGO_LOG_LEVEL', 'INFO'),
|
||||
'propagate': False,
|
||||
},
|
||||
'scanner': {
|
||||
'handlers': ['console'],
|
||||
'level': 'DEBUG' if DEBUG else 'INFO',
|
||||
'propagate': False,
|
||||
},
|
||||
'celery': {
|
||||
'handlers': ['console'],
|
||||
'level': 'INFO',
|
||||
'propagate': False,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
(BASE_DIR / 'logs').mkdir(exist_ok=True)
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
"""
|
||||
URL configuration for Website Analyzer project.
|
||||
"""
|
||||
|
||||
from django.contrib import admin
|
||||
from django.urls import path, include
|
||||
from django.views.generic import TemplateView
|
||||
|
||||
|
||||
urlpatterns = [
|
||||
# Admin
|
||||
path('admin/', admin.site.urls),
|
||||
|
||||
# API endpoints
|
||||
path('api/', include('api.urls')),
|
||||
|
||||
# Frontend views
|
||||
path('', TemplateView.as_view(template_name='index.html'), name='home'),
|
||||
path('scan/<uuid:scan_id>/', TemplateView.as_view(template_name='scan_detail.html'), name='scan_detail'),
|
||||
]
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
"""
|
||||
WSGI config for Website Analyzer project.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
|
||||
application = get_wsgi_application()
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#!/usr/bin/env python
|
||||
"""Django's command-line utility for administrative tasks."""
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
"""Run administrative tasks."""
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
try:
|
||||
from django.core.management import execute_from_command_line
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Couldn't import Django. Are you sure it's installed and "
|
||||
"available on your PYTHONPATH environment variable? Did you "
|
||||
"forget to activate a virtual environment?"
|
||||
) from exc
|
||||
execute_from_command_line(sys.argv)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
[build-system]
|
||||
requires = ["setuptools>=61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "website-analyzer"
|
||||
version = "1.0.0"
|
||||
description = "A Django-based web application for analyzing website performance, security, and best practices"
|
||||
readme = "README.md"
|
||||
license = {text = "MIT"}
|
||||
requires-python = ">=3.11"
|
||||
authors = [
|
||||
{name = "Website Analyzer Team"}
|
||||
]
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Framework :: Django :: 5.0",
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"Django>=5.0,<6.0",
|
||||
"djangorestframework>=3.14.0",
|
||||
"django-cors-headers>=4.3.0",
|
||||
"psycopg2-binary>=2.9.9",
|
||||
"celery[redis]>=5.3.0",
|
||||
"redis>=5.0.0",
|
||||
"httpx>=0.26.0",
|
||||
"playwright>=1.40.0",
|
||||
"python-dotenv>=1.0.0",
|
||||
"gunicorn>=21.2.0",
|
||||
"whitenoise>=6.6.0",
|
||||
"validators>=0.22.0",
|
||||
"ipaddress>=1.0.23",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=7.4.0",
|
||||
"pytest-django>=4.7.0",
|
||||
"pytest-asyncio>=0.23.0",
|
||||
"pytest-cov>=4.1.0",
|
||||
"black>=23.12.0",
|
||||
"isort>=5.13.0",
|
||||
"flake8>=7.0.0",
|
||||
"mypy>=1.8.0",
|
||||
"django-stubs>=4.2.0",
|
||||
]
|
||||
|
||||
[tool.black]
|
||||
line-length = 100
|
||||
target-version = ['py311']
|
||||
include = '\.pyi?$'
|
||||
exclude = '''
|
||||
/(
|
||||
\.git
|
||||
| \.hg
|
||||
| \.mypy_cache
|
||||
| \.tox
|
||||
| \.venv
|
||||
| _build
|
||||
| buck-out
|
||||
| build
|
||||
| dist
|
||||
| migrations
|
||||
)/
|
||||
'''
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
line_length = 100
|
||||
skip = ["migrations", ".venv"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
DJANGO_SETTINGS_MODULE = "core.settings"
|
||||
python_files = ["test_*.py", "*_test.py"]
|
||||
addopts = "-v --tb=short"
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
plugins = ["mypy_django_plugin.main"]
|
||||
ignore_missing_imports = true
|
||||
strict = false
|
||||
|
||||
[tool.django-stubs]
|
||||
django_settings_module = "core.settings"
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
# Django & REST Framework
|
||||
Django>=5.0,<6.0
|
||||
djangorestframework>=3.14.0
|
||||
django-cors-headers>=4.3.0
|
||||
|
||||
# Database
|
||||
psycopg2-binary>=2.9.9
|
||||
|
||||
# Async Task Queue
|
||||
celery[redis]>=5.3.0
|
||||
redis>=5.0.0
|
||||
|
||||
# HTTP Client
|
||||
httpx>=0.26.0
|
||||
|
||||
# Browser Automation
|
||||
playwright>=1.40.0
|
||||
|
||||
# Environment & Config
|
||||
python-dotenv>=1.0.0
|
||||
|
||||
# Production Server
|
||||
gunicorn>=21.2.0
|
||||
whitenoise>=6.6.0
|
||||
|
||||
# Validation & Utilities
|
||||
validators>=0.22.0
|
||||
|
||||
# Development & Testing
|
||||
pytest>=7.4.0
|
||||
pytest-django>=4.7.0
|
||||
pytest-asyncio>=0.23.0
|
||||
pytest-cov>=4.1.0
|
||||
black>=23.12.0
|
||||
isort>=5.13.0
|
||||
flake8>=7.0.0
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
"""
|
||||
Scanner app initialization.
|
||||
"""
|
||||
|
||||
default_app_config = 'scanner.apps.ScannerConfig'
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
"""
|
||||
Scanner app configuration.
|
||||
"""
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class ScannerConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'scanner'
|
||||
verbose_name = 'Scanner Tools'
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
"""
|
||||
Scanner modules initialization.
|
||||
|
||||
This package contains the various scanner implementations
|
||||
that analyze websites for performance, security, and best practices.
|
||||
"""
|
||||
|
||||
from .base import BaseScanner, ScannerResult
|
||||
from .lighthouse import LighthouseScanner
|
||||
from .playwright_scanner import PlaywrightScanner
|
||||
from .zap import ZAPScanner
|
||||
from .headers import HeaderScanner
|
||||
from .tls import TLSScanner
|
||||
from .runner import ScanRunner
|
||||
|
||||
__all__ = [
|
||||
'BaseScanner',
|
||||
'ScannerResult',
|
||||
'LighthouseScanner',
|
||||
'PlaywrightScanner',
|
||||
'ZAPScanner',
|
||||
'HeaderScanner',
|
||||
'TLSScanner',
|
||||
'ScanRunner',
|
||||
]
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
"""
|
||||
Base scanner interface and result structures.
|
||||
|
||||
All scanner implementations should inherit from BaseScanner
|
||||
and return ScannerResult objects.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
from enum import Enum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ScannerStatus(str, Enum):
|
||||
"""Status of a scanner execution."""
|
||||
SUCCESS = "success"
|
||||
PARTIAL = "partial"
|
||||
FAILED = "failed"
|
||||
SKIPPED = "skipped"
|
||||
|
||||
|
||||
@dataclass
|
||||
class IssueData:
|
||||
"""
|
||||
Represents a single issue found by a scanner.
|
||||
|
||||
Attributes:
|
||||
category: Issue category (security, performance, etc.)
|
||||
severity: Issue severity (critical, high, medium, low, info)
|
||||
title: Brief title of the issue
|
||||
description: Detailed description
|
||||
tool: The scanner that found this issue
|
||||
affected_url: Specific URL affected (optional)
|
||||
remediation: Suggested fix (optional)
|
||||
raw_data: Original scanner data (optional)
|
||||
"""
|
||||
category: str
|
||||
severity: str
|
||||
title: str
|
||||
description: str
|
||||
tool: str
|
||||
affected_url: Optional[str] = None
|
||||
remediation: Optional[str] = None
|
||||
raw_data: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricData:
|
||||
"""
|
||||
Represents a single metric measured by a scanner.
|
||||
|
||||
Attributes:
|
||||
name: Internal name (e.g., 'first_contentful_paint_ms')
|
||||
display_name: Human-readable name
|
||||
value: Numeric value
|
||||
unit: Unit of measurement
|
||||
source: The scanner that measured this
|
||||
score: Normalized score (0-1) if available
|
||||
"""
|
||||
name: str
|
||||
display_name: str
|
||||
value: float
|
||||
unit: str
|
||||
source: str
|
||||
score: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScannerResult:
|
||||
"""
|
||||
Result of a scanner execution.
|
||||
|
||||
Attributes:
|
||||
scanner_name: Name of the scanner
|
||||
status: Execution status
|
||||
issues: List of issues found
|
||||
metrics: List of metrics measured
|
||||
scores: Dictionary of category scores
|
||||
raw_data: Original scanner output
|
||||
error_message: Error details if failed
|
||||
"""
|
||||
scanner_name: str
|
||||
status: ScannerStatus
|
||||
issues: List[IssueData] = field(default_factory=list)
|
||||
metrics: List[MetricData] = field(default_factory=list)
|
||||
scores: Dict[str, int] = field(default_factory=dict)
|
||||
raw_data: Optional[Dict[str, Any]] = None
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
class BaseScanner(ABC):
|
||||
"""
|
||||
Abstract base class for all scanners.
|
||||
|
||||
Each scanner implementation must implement the `run` method
|
||||
which performs the actual scan and returns a ScannerResult.
|
||||
"""
|
||||
|
||||
name: str = "base"
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
"""
|
||||
Initialize the scanner with optional configuration.
|
||||
|
||||
Args:
|
||||
config: Scanner-specific configuration dictionary
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.logger = logging.getLogger(f"scanner.{self.name}")
|
||||
|
||||
@abstractmethod
|
||||
def run(self, url: str) -> ScannerResult:
|
||||
"""
|
||||
Run the scanner against the given URL.
|
||||
|
||||
Args:
|
||||
url: The URL to scan
|
||||
|
||||
Returns:
|
||||
ScannerResult with findings, metrics, and status
|
||||
"""
|
||||
pass
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the scanner service/tool is available.
|
||||
|
||||
Returns:
|
||||
True if the scanner can be used, False otherwise
|
||||
"""
|
||||
return True
|
||||
|
||||
def _create_error_result(self, error: Exception) -> ScannerResult:
|
||||
"""
|
||||
Create a failed result from an exception.
|
||||
|
||||
Args:
|
||||
error: The exception that occurred
|
||||
|
||||
Returns:
|
||||
ScannerResult with failed status
|
||||
"""
|
||||
self.logger.error(f"Scanner {self.name} failed: {error}")
|
||||
return ScannerResult(
|
||||
scanner_name=self.name,
|
||||
status=ScannerStatus.FAILED,
|
||||
error_message=str(error),
|
||||
issues=[
|
||||
IssueData(
|
||||
category="scanner",
|
||||
severity="info",
|
||||
title=f"{self.name.title()} scan failed",
|
||||
description=f"The {self.name} scanner encountered an error: {error}",
|
||||
tool=self.name,
|
||||
remediation="Check scanner service configuration and availability."
|
||||
)
|
||||
]
|
||||
)
|
||||
|
|
@ -0,0 +1,405 @@
|
|||
"""
|
||||
HTTP Header Security Scanner.
|
||||
|
||||
This module analyzes HTTP response headers for security
|
||||
best practices and common misconfigurations.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import (
|
||||
BaseScanner,
|
||||
ScannerResult,
|
||||
ScannerStatus,
|
||||
IssueData,
|
||||
MetricData,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Security header definitions with expected values and severity
|
||||
SECURITY_HEADERS = {
|
||||
'Strict-Transport-Security': {
|
||||
'severity': 'high',
|
||||
'description': 'HTTP Strict Transport Security (HSTS) forces browsers to use HTTPS.',
|
||||
'remediation': (
|
||||
'Add the header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload'
|
||||
),
|
||||
'check_value': lambda v: 'max-age' in v.lower() and int(
|
||||
v.lower().split('max-age=')[1].split(';')[0].strip()
|
||||
) >= 31536000 if 'max-age=' in v.lower() else False,
|
||||
},
|
||||
'Content-Security-Policy': {
|
||||
'severity': 'high',
|
||||
'description': 'Content Security Policy (CSP) helps prevent XSS and data injection attacks.',
|
||||
'remediation': (
|
||||
"Implement a Content-Security-Policy header that restricts sources for scripts, "
|
||||
"styles, and other resources. Start with a report-only policy to identify issues."
|
||||
),
|
||||
'check_value': lambda v: "default-src" in v.lower() or "script-src" in v.lower(),
|
||||
},
|
||||
'X-Content-Type-Options': {
|
||||
'severity': 'medium',
|
||||
'description': 'Prevents browsers from MIME-sniffing responses.',
|
||||
'remediation': 'Add the header: X-Content-Type-Options: nosniff',
|
||||
'check_value': lambda v: v.lower() == 'nosniff',
|
||||
},
|
||||
'X-Frame-Options': {
|
||||
'severity': 'medium',
|
||||
'description': 'Protects against clickjacking by controlling page framing.',
|
||||
'remediation': 'Add the header: X-Frame-Options: DENY or SAMEORIGIN',
|
||||
'check_value': lambda v: v.upper() in ['DENY', 'SAMEORIGIN'],
|
||||
},
|
||||
'Referrer-Policy': {
|
||||
'severity': 'low',
|
||||
'description': 'Controls how much referrer information is sent with requests.',
|
||||
'remediation': (
|
||||
'Add the header: Referrer-Policy: strict-origin-when-cross-origin '
|
||||
'or no-referrer-when-downgrade'
|
||||
),
|
||||
'check_value': lambda v: v.lower() in [
|
||||
'no-referrer', 'no-referrer-when-downgrade',
|
||||
'strict-origin', 'strict-origin-when-cross-origin',
|
||||
'same-origin', 'origin', 'origin-when-cross-origin'
|
||||
],
|
||||
},
|
||||
'Permissions-Policy': {
|
||||
'severity': 'low',
|
||||
'description': 'Controls which browser features can be used.',
|
||||
'remediation': (
|
||||
'Add a Permissions-Policy header to restrict access to sensitive browser APIs '
|
||||
'like geolocation, camera, and microphone.'
|
||||
),
|
||||
'check_value': lambda v: len(v) > 0,
|
||||
},
|
||||
'X-XSS-Protection': {
|
||||
'severity': 'info',
|
||||
'description': 'Legacy XSS filter (deprecated in modern browsers, CSP is preferred).',
|
||||
'remediation': 'While deprecated, you can add: X-XSS-Protection: 1; mode=block',
|
||||
'check_value': lambda v: '1' in v,
|
||||
},
|
||||
}
|
||||
|
||||
# CORS security checks
|
||||
CORS_CHECKS = {
|
||||
'permissive_origin': {
|
||||
'severity': 'high',
|
||||
'title': 'Overly permissive CORS (Access-Control-Allow-Origin: *)',
|
||||
'description': (
|
||||
'The server allows requests from any origin. This can expose sensitive data '
|
||||
'to malicious websites if combined with credentials.'
|
||||
),
|
||||
'remediation': (
|
||||
'Restrict Access-Control-Allow-Origin to specific trusted domains instead of using *. '
|
||||
'Never use * with Access-Control-Allow-Credentials: true.'
|
||||
),
|
||||
},
|
||||
'credentials_with_wildcard': {
|
||||
'severity': 'critical',
|
||||
'title': 'CORS allows credentials with wildcard origin',
|
||||
'description': (
|
||||
'The server has Access-Control-Allow-Credentials: true with Access-Control-Allow-Origin: *. '
|
||||
'This is a severe misconfiguration that can allow credential theft.'
|
||||
),
|
||||
'remediation': (
|
||||
'Never combine Access-Control-Allow-Credentials: true with a wildcard origin. '
|
||||
'Implement a whitelist of allowed origins.'
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class HeaderScanner(BaseScanner):
|
||||
"""
|
||||
Scanner for HTTP security headers.
|
||||
|
||||
Checks for:
|
||||
- Missing security headers
|
||||
- Improperly configured headers
|
||||
- CORS misconfigurations
|
||||
- Cookie security flags
|
||||
"""
|
||||
|
||||
name = "header_check"
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
self.timeout = self.config.get('timeout', 30)
|
||||
|
||||
def run(self, url: str) -> ScannerResult:
|
||||
"""
|
||||
Run header security analysis on the URL.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
ScannerResult with header findings
|
||||
"""
|
||||
self.logger.info(f"Starting header scan for {url}")
|
||||
|
||||
try:
|
||||
# Make both GET and HEAD requests
|
||||
headers_data = self._fetch_headers(url)
|
||||
|
||||
issues = []
|
||||
metrics = []
|
||||
|
||||
# Check security headers
|
||||
header_issues, header_score = self._check_security_headers(
|
||||
headers_data['headers']
|
||||
)
|
||||
issues.extend(header_issues)
|
||||
|
||||
# Check CORS configuration
|
||||
cors_issues = self._check_cors(headers_data['headers'], url)
|
||||
issues.extend(cors_issues)
|
||||
|
||||
# Check cookies
|
||||
cookie_issues = self._check_cookies(headers_data['headers'], url)
|
||||
issues.extend(cookie_issues)
|
||||
|
||||
# Create metrics
|
||||
metrics.append(MetricData(
|
||||
name='security_headers_score',
|
||||
display_name='Security Headers Score',
|
||||
value=float(header_score),
|
||||
unit='percent',
|
||||
source='header_check'
|
||||
))
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='headers_missing_count',
|
||||
display_name='Missing Security Headers',
|
||||
value=float(len([i for i in header_issues if 'missing' in i.title.lower()])),
|
||||
unit='count',
|
||||
source='header_check'
|
||||
))
|
||||
|
||||
self.logger.info(
|
||||
f"Header scan complete: {len(issues)} issues, score: {header_score}"
|
||||
)
|
||||
|
||||
return ScannerResult(
|
||||
scanner_name=self.name,
|
||||
status=ScannerStatus.SUCCESS,
|
||||
issues=issues,
|
||||
metrics=metrics,
|
||||
raw_data=headers_data
|
||||
)
|
||||
|
||||
except httpx.TimeoutException:
|
||||
return self._create_error_result(Exception("Header check timed out"))
|
||||
except Exception as e:
|
||||
return self._create_error_result(e)
|
||||
|
||||
def _fetch_headers(self, url: str) -> Dict[str, Any]:
|
||||
"""Fetch headers from the URL."""
|
||||
with httpx.Client(
|
||||
timeout=self.timeout,
|
||||
follow_redirects=True,
|
||||
verify=True
|
||||
) as client:
|
||||
# GET request
|
||||
get_response = client.get(url)
|
||||
|
||||
# HEAD request
|
||||
head_response = client.head(url)
|
||||
|
||||
return {
|
||||
'url': str(get_response.url),
|
||||
'status_code': get_response.status_code,
|
||||
'headers': dict(get_response.headers),
|
||||
'head_headers': dict(head_response.headers),
|
||||
'redirected': str(get_response.url) != url,
|
||||
'redirect_history': [str(r.url) for r in get_response.history],
|
||||
}
|
||||
|
||||
def _check_security_headers(
|
||||
self,
|
||||
headers: Dict[str, str]
|
||||
) -> Tuple[List[IssueData], int]:
|
||||
"""
|
||||
Check for security headers.
|
||||
|
||||
Returns:
|
||||
Tuple of (list of issues, security score 0-100)
|
||||
"""
|
||||
issues = []
|
||||
score = 100
|
||||
headers_lower = {k.lower(): v for k, v in headers.items()}
|
||||
|
||||
for header_name, config in SECURITY_HEADERS.items():
|
||||
header_key = header_name.lower()
|
||||
|
||||
if header_key not in headers_lower:
|
||||
# Missing header
|
||||
severity = config['severity']
|
||||
deduction = {'critical': 20, 'high': 15, 'medium': 10, 'low': 5, 'info': 2}
|
||||
score -= deduction.get(severity, 5)
|
||||
|
||||
issues.append(IssueData(
|
||||
category='headers',
|
||||
severity=severity,
|
||||
title=f'Missing security header: {header_name}',
|
||||
description=config['description'],
|
||||
tool='header_check',
|
||||
remediation=config['remediation'],
|
||||
raw_data={'header': header_name, 'status': 'missing'}
|
||||
))
|
||||
else:
|
||||
# Header present, check value
|
||||
value = headers_lower[header_key]
|
||||
check_func = config.get('check_value')
|
||||
|
||||
if check_func and not check_func(value):
|
||||
issues.append(IssueData(
|
||||
category='headers',
|
||||
severity='low',
|
||||
title=f'Weak configuration: {header_name}',
|
||||
description=(
|
||||
f"{config['description']} "
|
||||
f"Current value may not provide optimal protection: {value}"
|
||||
),
|
||||
tool='header_check',
|
||||
remediation=config['remediation'],
|
||||
raw_data={'header': header_name, 'value': value, 'status': 'weak'}
|
||||
))
|
||||
score -= 3
|
||||
|
||||
return issues, max(0, score)
|
||||
|
||||
def _check_cors(self, headers: Dict[str, str], url: str) -> List[IssueData]:
|
||||
"""Check CORS configuration for issues."""
|
||||
issues = []
|
||||
headers_lower = {k.lower(): v for k, v in headers.items()}
|
||||
|
||||
acao = headers_lower.get('access-control-allow-origin', '')
|
||||
acac = headers_lower.get('access-control-allow-credentials', '')
|
||||
|
||||
if acao == '*':
|
||||
if acac.lower() == 'true':
|
||||
# Critical: credentials with wildcard
|
||||
check = CORS_CHECKS['credentials_with_wildcard']
|
||||
issues.append(IssueData(
|
||||
category='cors',
|
||||
severity=check['severity'],
|
||||
title=check['title'],
|
||||
description=check['description'],
|
||||
tool='header_check',
|
||||
affected_url=url,
|
||||
remediation=check['remediation'],
|
||||
raw_data={
|
||||
'Access-Control-Allow-Origin': acao,
|
||||
'Access-Control-Allow-Credentials': acac
|
||||
}
|
||||
))
|
||||
else:
|
||||
# Warning: permissive origin
|
||||
check = CORS_CHECKS['permissive_origin']
|
||||
issues.append(IssueData(
|
||||
category='cors',
|
||||
severity='medium', # Lower severity without credentials
|
||||
title=check['title'],
|
||||
description=check['description'],
|
||||
tool='header_check',
|
||||
affected_url=url,
|
||||
remediation=check['remediation'],
|
||||
raw_data={'Access-Control-Allow-Origin': acao}
|
||||
))
|
||||
|
||||
return issues
|
||||
|
||||
def _check_cookies(self, headers: Dict[str, str], url: str) -> List[IssueData]:
|
||||
"""Check Set-Cookie headers for security flags."""
|
||||
issues = []
|
||||
headers_lower = {k.lower(): v for k, v in headers.items()}
|
||||
|
||||
# Get all Set-Cookie headers
|
||||
set_cookies = []
|
||||
for key, value in headers.items():
|
||||
if key.lower() == 'set-cookie':
|
||||
set_cookies.append(value)
|
||||
|
||||
is_https = url.startswith('https://')
|
||||
|
||||
for cookie in set_cookies:
|
||||
cookie_lower = cookie.lower()
|
||||
cookie_name = cookie.split('=')[0] if '=' in cookie else 'unknown'
|
||||
|
||||
cookie_issues = []
|
||||
|
||||
# Check Secure flag on HTTPS
|
||||
if is_https and 'secure' not in cookie_lower:
|
||||
cookie_issues.append({
|
||||
'flag': 'Secure',
|
||||
'description': (
|
||||
'Cookie is set without Secure flag on HTTPS site. '
|
||||
'This allows the cookie to be sent over unencrypted connections.'
|
||||
),
|
||||
'severity': 'high'
|
||||
})
|
||||
|
||||
# Check HttpOnly flag (important for session cookies)
|
||||
if 'httponly' not in cookie_lower:
|
||||
# Check if it might be a session cookie
|
||||
if any(term in cookie_name.lower() for term in ['session', 'auth', 'token', 'user']):
|
||||
cookie_issues.append({
|
||||
'flag': 'HttpOnly',
|
||||
'description': (
|
||||
'Session-like cookie is set without HttpOnly flag. '
|
||||
'This allows JavaScript access, increasing XSS risk.'
|
||||
),
|
||||
'severity': 'high'
|
||||
})
|
||||
else:
|
||||
cookie_issues.append({
|
||||
'flag': 'HttpOnly',
|
||||
'description': (
|
||||
'Cookie is set without HttpOnly flag. '
|
||||
'Consider adding it unless JavaScript needs access.'
|
||||
),
|
||||
'severity': 'low'
|
||||
})
|
||||
|
||||
# Check SameSite attribute
|
||||
if 'samesite' not in cookie_lower:
|
||||
cookie_issues.append({
|
||||
'flag': 'SameSite',
|
||||
'description': (
|
||||
'Cookie is set without SameSite attribute. '
|
||||
'This can enable CSRF attacks in some scenarios.'
|
||||
),
|
||||
'severity': 'medium'
|
||||
})
|
||||
elif 'samesite=none' in cookie_lower and 'secure' not in cookie_lower:
|
||||
cookie_issues.append({
|
||||
'flag': 'SameSite=None without Secure',
|
||||
'description': (
|
||||
'Cookie has SameSite=None but no Secure flag. '
|
||||
'Modern browsers will reject this cookie.'
|
||||
),
|
||||
'severity': 'medium'
|
||||
})
|
||||
|
||||
# Create issues for this cookie
|
||||
for ci in cookie_issues:
|
||||
issues.append(IssueData(
|
||||
category='security',
|
||||
severity=ci['severity'],
|
||||
title=f"Cookie '{cookie_name}' missing {ci['flag']} flag",
|
||||
description=ci['description'],
|
||||
tool='header_check',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
f"Add the {ci['flag']} flag to the Set-Cookie header. "
|
||||
f"Example: Set-Cookie: {cookie_name}=value; Secure; HttpOnly; SameSite=Strict"
|
||||
),
|
||||
raw_data={'cookie': cookie[:200]} # Truncate for storage
|
||||
))
|
||||
|
||||
return issues
|
||||
|
|
@ -0,0 +1,323 @@
|
|||
"""
|
||||
Lighthouse Scanner Integration.
|
||||
|
||||
This module integrates with Google Lighthouse to measure
|
||||
performance, accessibility, SEO, and best practices.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from .base import (
|
||||
BaseScanner,
|
||||
ScannerResult,
|
||||
ScannerStatus,
|
||||
IssueData,
|
||||
MetricData,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LighthouseScanner(BaseScanner):
|
||||
"""
|
||||
Scanner that uses Google Lighthouse for performance analysis.
|
||||
|
||||
Communicates with the Lighthouse service container via HTTP API.
|
||||
Collects performance metrics, Core Web Vitals, and various audits.
|
||||
"""
|
||||
|
||||
name = "lighthouse"
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
self.service_url = self.config.get(
|
||||
'service_url',
|
||||
'http://lighthouse:3001'
|
||||
)
|
||||
self.timeout = self.config.get('timeout', 120)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if Lighthouse service is available."""
|
||||
try:
|
||||
with httpx.Client(timeout=5) as client:
|
||||
response = client.get(f"{self.service_url}/health")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Lighthouse service not available: {e}")
|
||||
return False
|
||||
|
||||
def run(self, url: str) -> ScannerResult:
|
||||
"""
|
||||
Run Lighthouse scan against the URL.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
ScannerResult with performance metrics and issues
|
||||
"""
|
||||
self.logger.info(f"Starting Lighthouse scan for {url}")
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=self.timeout) as client:
|
||||
response = client.post(
|
||||
f"{self.service_url}/scan",
|
||||
json={"url": url}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
return self._parse_results(url, data)
|
||||
|
||||
except httpx.TimeoutException:
|
||||
return self._create_error_result(
|
||||
Exception("Lighthouse scan timed out")
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
return self._create_error_result(
|
||||
Exception(f"Lighthouse service error: {e.response.status_code}")
|
||||
)
|
||||
except Exception as e:
|
||||
return self._create_error_result(e)
|
||||
|
||||
def _parse_results(self, url: str, data: Dict[str, Any]) -> ScannerResult:
|
||||
"""
|
||||
Parse Lighthouse results into ScannerResult format.
|
||||
|
||||
Args:
|
||||
url: The scanned URL
|
||||
data: Raw Lighthouse response data
|
||||
|
||||
Returns:
|
||||
Parsed ScannerResult
|
||||
"""
|
||||
issues = []
|
||||
metrics = []
|
||||
|
||||
# Extract scores
|
||||
scores = data.get('scores', {})
|
||||
|
||||
# Extract and create metrics
|
||||
raw_metrics = data.get('metrics', {})
|
||||
|
||||
# Core Web Vitals
|
||||
metric_mappings = [
|
||||
('firstContentfulPaint', 'First Contentful Paint', 'ms'),
|
||||
('largestContentfulPaint', 'Largest Contentful Paint', 'ms'),
|
||||
('speedIndex', 'Speed Index', 'ms'),
|
||||
('timeToInteractive', 'Time to Interactive', 'ms'),
|
||||
('totalBlockingTime', 'Total Blocking Time', 'ms'),
|
||||
('cumulativeLayoutShift', 'Cumulative Layout Shift', 'score'),
|
||||
]
|
||||
|
||||
for key, display_name, unit in metric_mappings:
|
||||
metric_data = raw_metrics.get(key, {})
|
||||
if metric_data and metric_data.get('value') is not None:
|
||||
metrics.append(MetricData(
|
||||
name=self._to_snake_case(key),
|
||||
display_name=display_name,
|
||||
value=metric_data['value'],
|
||||
unit=unit,
|
||||
source='lighthouse',
|
||||
score=metric_data.get('score')
|
||||
))
|
||||
|
||||
# Resource metrics
|
||||
resources = data.get('resources', {})
|
||||
diagnostics = data.get('diagnostics', {})
|
||||
|
||||
if resources.get('totalByteWeight'):
|
||||
metrics.append(MetricData(
|
||||
name='total_byte_weight',
|
||||
display_name='Total Page Weight',
|
||||
value=resources['totalByteWeight'],
|
||||
unit='bytes',
|
||||
source='lighthouse'
|
||||
))
|
||||
|
||||
if resources.get('bootupTime'):
|
||||
metrics.append(MetricData(
|
||||
name='javascript_bootup_time',
|
||||
display_name='JavaScript Boot-up Time',
|
||||
value=resources['bootupTime'],
|
||||
unit='ms',
|
||||
source='lighthouse'
|
||||
))
|
||||
|
||||
if diagnostics.get('numRequests'):
|
||||
metrics.append(MetricData(
|
||||
name='total_requests',
|
||||
display_name='Total Network Requests',
|
||||
value=float(diagnostics['numRequests']),
|
||||
unit='count',
|
||||
source='lighthouse'
|
||||
))
|
||||
|
||||
# Extract issues from failed audits
|
||||
raw_issues = data.get('issues', [])
|
||||
for issue in raw_issues:
|
||||
severity = self._score_to_severity(issue.get('score', 0.5))
|
||||
category = self._map_category(issue.get('category', 'performance'))
|
||||
|
||||
issues.append(IssueData(
|
||||
category=category,
|
||||
severity=severity,
|
||||
title=issue.get('title', 'Unknown issue'),
|
||||
description=issue.get('description', ''),
|
||||
tool='lighthouse',
|
||||
affected_url=url,
|
||||
remediation=self._get_remediation(issue.get('id')),
|
||||
raw_data=issue
|
||||
))
|
||||
|
||||
# Check for large bundles
|
||||
large_scripts = resources.get('scriptTreemap', [])
|
||||
for script in large_scripts[:5]: # Top 5 largest
|
||||
if script.get('resourceBytes', 0) > settings.SCANNER_CONFIG.get(
|
||||
'LARGE_JS_BUNDLE_THRESHOLD_BYTES', 500 * 1024
|
||||
):
|
||||
issues.append(IssueData(
|
||||
category='resources',
|
||||
severity='medium',
|
||||
title=f"Large JavaScript bundle detected",
|
||||
description=(
|
||||
f"The script '{script.get('name', 'Unknown')}' "
|
||||
f"is {script['resourceBytes'] / 1024:.1f} KB. "
|
||||
"Large bundles can slow down page load and increase memory usage."
|
||||
),
|
||||
tool='lighthouse',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
"Consider code splitting, tree shaking, or lazy loading "
|
||||
"to reduce bundle size."
|
||||
),
|
||||
raw_data=script
|
||||
))
|
||||
|
||||
# Check for unused JavaScript
|
||||
unused_js = resources.get('unusedJavascript', [])
|
||||
if unused_js:
|
||||
total_wasted = sum(u.get('wastedBytes', 0) for u in unused_js)
|
||||
if total_wasted > 100 * 1024: # More than 100KB unused
|
||||
issues.append(IssueData(
|
||||
category='performance',
|
||||
severity='medium',
|
||||
title="Significant unused JavaScript detected",
|
||||
description=(
|
||||
f"Found {total_wasted / 1024:.1f} KB of unused JavaScript "
|
||||
f"across {len(unused_js)} resources. This increases page "
|
||||
"load time and memory usage."
|
||||
),
|
||||
tool='lighthouse',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
"Remove unused code or use code splitting to load "
|
||||
"JavaScript only when needed."
|
||||
),
|
||||
raw_data={'unused_resources': unused_js}
|
||||
))
|
||||
|
||||
# Check for render-blocking resources
|
||||
blocking = resources.get('renderBlockingResources', [])
|
||||
if blocking:
|
||||
total_wasted_ms = sum(r.get('wastedMs', 0) for r in blocking)
|
||||
if total_wasted_ms > 500:
|
||||
issues.append(IssueData(
|
||||
category='performance',
|
||||
severity='medium',
|
||||
title="Render-blocking resources detected",
|
||||
description=(
|
||||
f"Found {len(blocking)} render-blocking resources "
|
||||
f"adding approximately {total_wasted_ms:.0f}ms to page load. "
|
||||
"These resources delay first paint."
|
||||
),
|
||||
tool='lighthouse',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
"Consider inlining critical CSS, deferring non-critical JS, "
|
||||
"or using async/defer attributes."
|
||||
),
|
||||
raw_data={'blocking_resources': blocking}
|
||||
))
|
||||
|
||||
self.logger.info(
|
||||
f"Lighthouse scan complete: {len(issues)} issues, {len(metrics)} metrics"
|
||||
)
|
||||
|
||||
return ScannerResult(
|
||||
scanner_name=self.name,
|
||||
status=ScannerStatus.SUCCESS,
|
||||
issues=issues,
|
||||
metrics=metrics,
|
||||
scores={
|
||||
'performance': scores.get('performance', 0),
|
||||
'accessibility': scores.get('accessibility', 0),
|
||||
'best_practices': scores.get('bestPractices', 0),
|
||||
'seo': scores.get('seo', 0),
|
||||
},
|
||||
raw_data=data
|
||||
)
|
||||
|
||||
def _to_snake_case(self, name: str) -> str:
|
||||
"""Convert camelCase to snake_case."""
|
||||
import re
|
||||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
||||
|
||||
def _score_to_severity(self, score: float) -> str:
|
||||
"""Convert Lighthouse score to severity level."""
|
||||
if score is None:
|
||||
return 'info'
|
||||
elif score < 0.25:
|
||||
return 'high'
|
||||
elif score < 0.5:
|
||||
return 'medium'
|
||||
elif score < 0.75:
|
||||
return 'low'
|
||||
else:
|
||||
return 'info'
|
||||
|
||||
def _map_category(self, lighthouse_category: str) -> str:
|
||||
"""Map Lighthouse category to our category."""
|
||||
mapping = {
|
||||
'performance': 'performance',
|
||||
'accessibility': 'accessibility',
|
||||
'best-practices': 'best_practices',
|
||||
'seo': 'seo',
|
||||
}
|
||||
return mapping.get(lighthouse_category, 'performance')
|
||||
|
||||
def _get_remediation(self, audit_id: str) -> str:
|
||||
"""Get remediation text for known audit IDs."""
|
||||
remediations = {
|
||||
'first-contentful-paint': (
|
||||
"Reduce server response time, eliminate render-blocking resources, "
|
||||
"and optimize critical rendering path."
|
||||
),
|
||||
'largest-contentful-paint': (
|
||||
"Optimize images, preload critical resources, and reduce server "
|
||||
"response time."
|
||||
),
|
||||
'total-blocking-time': (
|
||||
"Reduce JavaScript execution time by breaking up long tasks, "
|
||||
"removing unused code, and minimizing main thread work."
|
||||
),
|
||||
'cumulative-layout-shift': (
|
||||
"Always include size attributes on images and videos, reserve space "
|
||||
"for ad slots, and avoid inserting content above existing content."
|
||||
),
|
||||
'speed-index': (
|
||||
"Minimize main thread work, reduce JavaScript execution time, "
|
||||
"and ensure text remains visible during webfont load."
|
||||
),
|
||||
'interactive': (
|
||||
"Reduce JavaScript payload, defer non-critical scripts, and "
|
||||
"minimize main thread work."
|
||||
),
|
||||
}
|
||||
return remediations.get(audit_id, "Review and optimize based on the audit details.")
|
||||
|
|
@ -0,0 +1,397 @@
|
|||
"""
|
||||
Playwright Scanner Integration.
|
||||
|
||||
This module uses Playwright to perform browser-based analysis,
|
||||
capturing console errors, network requests, and resource metrics.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from .base import (
|
||||
BaseScanner,
|
||||
ScannerResult,
|
||||
ScannerStatus,
|
||||
IssueData,
|
||||
MetricData,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PlaywrightScanner(BaseScanner):
|
||||
"""
|
||||
Scanner using Playwright for browser-based analysis.
|
||||
|
||||
Captures:
|
||||
- Console errors and warnings
|
||||
- Network request details
|
||||
- Page load timing
|
||||
- Large resources (images, scripts)
|
||||
- Memory usage indicators
|
||||
"""
|
||||
|
||||
name = "playwright"
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
self.timeout = self.config.get('timeout', 30000) # 30 seconds
|
||||
self.viewport = self.config.get('viewport', {'width': 1920, 'height': 1080})
|
||||
|
||||
def run(self, url: str) -> ScannerResult:
|
||||
"""
|
||||
Run Playwright analysis on the URL.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
ScannerResult with browser analysis data
|
||||
"""
|
||||
self.logger.info(f"Starting Playwright scan for {url}")
|
||||
|
||||
try:
|
||||
# Run async scan in sync context
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
result = loop.run_until_complete(self._async_scan(url))
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return self._create_error_result(e)
|
||||
|
||||
async def _async_scan(self, url: str) -> ScannerResult:
|
||||
"""
|
||||
Async implementation of the scan.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
ScannerResult with findings
|
||||
"""
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
issues = []
|
||||
metrics = []
|
||||
raw_data = {
|
||||
'console_messages': [],
|
||||
'network_requests': [],
|
||||
'failed_requests': [],
|
||||
'large_resources': [],
|
||||
}
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=True,
|
||||
args=[
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
]
|
||||
)
|
||||
|
||||
context = await browser.new_context(
|
||||
viewport=self.viewport,
|
||||
user_agent=(
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
||||
'(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
)
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# Collect data
|
||||
console_messages = []
|
||||
network_requests = []
|
||||
failed_requests = []
|
||||
|
||||
# Set up event listeners
|
||||
page.on("console", lambda msg: console_messages.append({
|
||||
'type': msg.type,
|
||||
'text': msg.text,
|
||||
'location': str(msg.location) if msg.location else None,
|
||||
}))
|
||||
|
||||
page.on("request", lambda req: network_requests.append({
|
||||
'url': req.url,
|
||||
'method': req.method,
|
||||
'resource_type': req.resource_type,
|
||||
'timestamp': time.time(),
|
||||
}))
|
||||
|
||||
page.on("requestfailed", lambda req: failed_requests.append({
|
||||
'url': req.url,
|
||||
'failure': req.failure,
|
||||
'resource_type': req.resource_type,
|
||||
}))
|
||||
|
||||
# Navigate and measure
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
response = await page.goto(
|
||||
url,
|
||||
wait_until='networkidle',
|
||||
timeout=self.timeout
|
||||
)
|
||||
load_time = (time.time() - start_time) * 1000 # Convert to ms
|
||||
|
||||
# Get response status
|
||||
status_code = response.status if response else 0
|
||||
|
||||
# Wait a bit more for any delayed scripts
|
||||
await page.wait_for_timeout(2000)
|
||||
|
||||
# Get performance timing
|
||||
perf_timing = await page.evaluate('''() => {
|
||||
const timing = performance.timing;
|
||||
const navigation = performance.getEntriesByType("navigation")[0];
|
||||
return {
|
||||
domContentLoaded: timing.domContentLoadedEventEnd - timing.navigationStart,
|
||||
domComplete: timing.domComplete - timing.navigationStart,
|
||||
loadEvent: timing.loadEventEnd - timing.navigationStart,
|
||||
firstPaint: navigation ? navigation.domComplete : null,
|
||||
transferSize: navigation ? navigation.transferSize : null,
|
||||
};
|
||||
}''')
|
||||
|
||||
# Get memory info (if available)
|
||||
memory_info = await page.evaluate('''() => {
|
||||
if (performance.memory) {
|
||||
return {
|
||||
usedJSHeapSize: performance.memory.usedJSHeapSize,
|
||||
totalJSHeapSize: performance.memory.totalJSHeapSize,
|
||||
jsHeapSizeLimit: performance.memory.jsHeapSizeLimit,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}''')
|
||||
|
||||
# Get resource sizes
|
||||
resources = await page.evaluate('''() => {
|
||||
const entries = performance.getEntriesByType("resource");
|
||||
return entries.map(e => ({
|
||||
name: e.name,
|
||||
type: e.initiatorType,
|
||||
transferSize: e.transferSize,
|
||||
duration: e.duration,
|
||||
}));
|
||||
}''')
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Page navigation error: {e}")
|
||||
load_time = self.timeout
|
||||
status_code = 0
|
||||
perf_timing = {}
|
||||
memory_info = None
|
||||
resources = []
|
||||
|
||||
await browser.close()
|
||||
|
||||
# Process collected data
|
||||
raw_data['console_messages'] = console_messages
|
||||
raw_data['network_requests'] = network_requests[:100] # Limit stored
|
||||
raw_data['failed_requests'] = failed_requests
|
||||
raw_data['performance_timing'] = perf_timing if 'perf_timing' in locals() else {}
|
||||
raw_data['memory_info'] = memory_info if 'memory_info' in locals() else None
|
||||
raw_data['status_code'] = status_code if 'status_code' in locals() else 0
|
||||
|
||||
# Create metrics
|
||||
metrics.append(MetricData(
|
||||
name='page_load_time',
|
||||
display_name='Page Load Time',
|
||||
value=load_time,
|
||||
unit='ms',
|
||||
source='playwright'
|
||||
))
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='total_network_requests',
|
||||
display_name='Total Network Requests',
|
||||
value=float(len(network_requests)),
|
||||
unit='count',
|
||||
source='playwright'
|
||||
))
|
||||
|
||||
# Calculate total transfer size
|
||||
total_transfer = sum(r.get('transferSize', 0) for r in resources if r.get('transferSize'))
|
||||
if total_transfer > 0:
|
||||
metrics.append(MetricData(
|
||||
name='total_transfer_size',
|
||||
display_name='Total Transfer Size',
|
||||
value=float(total_transfer),
|
||||
unit='bytes',
|
||||
source='playwright'
|
||||
))
|
||||
|
||||
if perf_timing.get('domContentLoaded'):
|
||||
metrics.append(MetricData(
|
||||
name='dom_content_loaded',
|
||||
display_name='DOM Content Loaded',
|
||||
value=float(perf_timing['domContentLoaded']),
|
||||
unit='ms',
|
||||
source='playwright'
|
||||
))
|
||||
|
||||
# Memory metrics
|
||||
if memory_info:
|
||||
metrics.append(MetricData(
|
||||
name='js_heap_used',
|
||||
display_name='JS Heap Used',
|
||||
value=float(memory_info.get('usedJSHeapSize', 0)),
|
||||
unit='bytes',
|
||||
source='playwright'
|
||||
))
|
||||
|
||||
# Check for high memory usage
|
||||
heap_used = memory_info.get('usedJSHeapSize', 0)
|
||||
heap_limit = memory_info.get('jsHeapSizeLimit', 1)
|
||||
heap_percent = (heap_used / heap_limit) * 100 if heap_limit > 0 else 0
|
||||
|
||||
if heap_percent > 50:
|
||||
issues.append(IssueData(
|
||||
category='resources',
|
||||
severity='medium',
|
||||
title='High JavaScript memory usage',
|
||||
description=(
|
||||
f'JavaScript is using {heap_used / (1024*1024):.1f} MB '
|
||||
f'({heap_percent:.1f}% of available heap). '
|
||||
'This may indicate memory-heavy operations or potential leaks.'
|
||||
),
|
||||
tool='playwright',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
'Review JavaScript for memory leaks, optimize data structures, '
|
||||
'and ensure proper cleanup of event listeners and timers.'
|
||||
),
|
||||
raw_data=memory_info
|
||||
))
|
||||
|
||||
# Analyze console messages for errors
|
||||
errors = [m for m in console_messages if m['type'] == 'error']
|
||||
warnings = [m for m in console_messages if m['type'] == 'warning']
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='console_errors_count',
|
||||
display_name='Console Errors',
|
||||
value=float(len(errors)),
|
||||
unit='count',
|
||||
source='playwright'
|
||||
))
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='console_warnings_count',
|
||||
display_name='Console Warnings',
|
||||
value=float(len(warnings)),
|
||||
unit='count',
|
||||
source='playwright'
|
||||
))
|
||||
|
||||
# Create issues for console errors
|
||||
if errors:
|
||||
# Group similar errors
|
||||
error_texts = set(e['text'][:200] for e in errors)
|
||||
for error_text in list(error_texts)[:10]: # Limit to 10 unique errors
|
||||
issues.append(IssueData(
|
||||
category='content',
|
||||
severity='medium',
|
||||
title='JavaScript console error',
|
||||
description=f'JavaScript error logged to console: {error_text}',
|
||||
tool='playwright',
|
||||
affected_url=url,
|
||||
remediation='Review and fix the JavaScript error in your code.',
|
||||
raw_data={'error': error_text}
|
||||
))
|
||||
|
||||
# Check for failed network requests
|
||||
if failed_requests:
|
||||
for req in failed_requests[:5]: # Limit reported
|
||||
issues.append(IssueData(
|
||||
category='content',
|
||||
severity='low',
|
||||
title='Failed network request',
|
||||
description=(
|
||||
f"Request to {req['url'][:100]} failed: {req.get('failure', 'Unknown error')}"
|
||||
),
|
||||
tool='playwright',
|
||||
affected_url=req['url'],
|
||||
remediation='Ensure the resource is available and CORS is configured correctly.',
|
||||
raw_data=req
|
||||
))
|
||||
|
||||
# Find large resources
|
||||
large_threshold = settings.SCANNER_CONFIG.get('LARGE_IMAGE_THRESHOLD_BYTES', 1024 * 1024)
|
||||
large_resources = [
|
||||
r for r in resources
|
||||
if r.get('transferSize', 0) > large_threshold
|
||||
]
|
||||
|
||||
for resource in large_resources[:5]: # Limit reported
|
||||
size_mb = resource['transferSize'] / (1024 * 1024)
|
||||
issues.append(IssueData(
|
||||
category='resources',
|
||||
severity='medium' if size_mb > 2 else 'low',
|
||||
title=f"Large resource detected ({size_mb:.1f} MB)",
|
||||
description=(
|
||||
f"The resource '{resource['name'][-80:]}' is {size_mb:.2f} MB. "
|
||||
"Large resources increase page load time and bandwidth usage."
|
||||
),
|
||||
tool='playwright',
|
||||
affected_url=resource['name'],
|
||||
remediation=(
|
||||
'Optimize images using compression, use appropriate formats (WebP, AVIF), '
|
||||
'implement lazy loading, or consider a CDN.'
|
||||
),
|
||||
raw_data=resource
|
||||
))
|
||||
|
||||
raw_data['large_resources'] = large_resources
|
||||
|
||||
# Count resources by type
|
||||
resource_counts = {}
|
||||
for req in network_requests:
|
||||
rtype = req.get('resource_type', 'other')
|
||||
resource_counts[rtype] = resource_counts.get(rtype, 0) + 1
|
||||
|
||||
raw_data['resource_counts'] = resource_counts
|
||||
|
||||
# Check for excessive requests
|
||||
if len(network_requests) > 100:
|
||||
issues.append(IssueData(
|
||||
category='performance',
|
||||
severity='medium',
|
||||
title='High number of network requests',
|
||||
description=(
|
||||
f'Page made {len(network_requests)} network requests. '
|
||||
'Excessive requests increase page load time and server load.'
|
||||
),
|
||||
tool='playwright',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
'Consolidate resources, use HTTP/2 multiplexing, implement '
|
||||
'resource bundling, and lazy load non-critical resources.'
|
||||
),
|
||||
raw_data=resource_counts
|
||||
))
|
||||
|
||||
self.logger.info(
|
||||
f"Playwright scan complete: {len(issues)} issues, {len(metrics)} metrics"
|
||||
)
|
||||
|
||||
return ScannerResult(
|
||||
scanner_name=self.name,
|
||||
status=ScannerStatus.SUCCESS,
|
||||
issues=issues,
|
||||
metrics=metrics,
|
||||
raw_data=raw_data
|
||||
)
|
||||
|
|
@ -0,0 +1,314 @@
|
|||
"""
|
||||
Scan Runner - Orchestrates multiple scanners.
|
||||
|
||||
This module coordinates running all enabled scanners against a URL
|
||||
and aggregates their results into a unified report.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from .base import BaseScanner, ScannerResult, ScannerStatus
|
||||
from .lighthouse import LighthouseScanner
|
||||
from .playwright_scanner import PlaywrightScanner
|
||||
from .zap import ZAPScanner
|
||||
from .headers import HeaderScanner
|
||||
from .tls import TLSScanner
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Default scanner classes to run
|
||||
DEFAULT_SCANNERS: List[Type[BaseScanner]] = [
|
||||
LighthouseScanner,
|
||||
PlaywrightScanner,
|
||||
ZAPScanner,
|
||||
HeaderScanner,
|
||||
TLSScanner,
|
||||
]
|
||||
|
||||
|
||||
class ScanRunner:
|
||||
"""
|
||||
Orchestrates running multiple scanners and aggregating results.
|
||||
|
||||
This class manages:
|
||||
- Running enabled scanners in parallel or sequence
|
||||
- Aggregating results from all scanners
|
||||
- Error handling and partial result compilation
|
||||
- Timeout management
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scanner_classes: Optional[List[Type[BaseScanner]]] = None,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
max_workers: int = 3
|
||||
):
|
||||
"""
|
||||
Initialize the scan runner.
|
||||
|
||||
Args:
|
||||
scanner_classes: List of scanner classes to use (defaults to all)
|
||||
config: Configuration dict passed to each scanner
|
||||
max_workers: Maximum concurrent scanner threads
|
||||
"""
|
||||
self.scanner_classes = scanner_classes or DEFAULT_SCANNERS
|
||||
self.config = config or {}
|
||||
self.max_workers = max_workers
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def run(self, url: str, parallel: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Run all scanners against the URL.
|
||||
|
||||
Args:
|
||||
url: The URL to scan
|
||||
parallel: Whether to run scanners in parallel
|
||||
|
||||
Returns:
|
||||
Aggregated results dictionary containing:
|
||||
- status: Overall scan status
|
||||
- scores: Aggregated scores
|
||||
- issues: All issues from all scanners
|
||||
- metrics: All metrics from all scanners
|
||||
- scanner_results: Individual scanner results
|
||||
- errors: Any scanner errors
|
||||
"""
|
||||
self.logger.info(f"Starting scan runner for {url} with {len(self.scanner_classes)} scanners")
|
||||
|
||||
# Initialize scanners
|
||||
scanners = self._initialize_scanners()
|
||||
|
||||
# Run scanners
|
||||
if parallel:
|
||||
results = self._run_parallel(scanners, url)
|
||||
else:
|
||||
results = self._run_sequential(scanners, url)
|
||||
|
||||
# Aggregate results
|
||||
aggregated = self._aggregate_results(results)
|
||||
|
||||
self.logger.info(
|
||||
f"Scan complete: {len(aggregated['issues'])} issues, "
|
||||
f"{len(aggregated['metrics'])} metrics, "
|
||||
f"status: {aggregated['status']}"
|
||||
)
|
||||
|
||||
return aggregated
|
||||
|
||||
def _initialize_scanners(self) -> List[BaseScanner]:
|
||||
"""Initialize scanner instances."""
|
||||
scanners = []
|
||||
scanner_config = settings.SCANNER_CONFIG
|
||||
|
||||
for scanner_class in self.scanner_classes:
|
||||
try:
|
||||
# Merge default config with scanner-specific config
|
||||
config = {**self.config}
|
||||
|
||||
# Add scanner-specific config
|
||||
if scanner_class == LighthouseScanner:
|
||||
config['service_url'] = 'http://lighthouse:3001'
|
||||
config['timeout'] = scanner_config.get('LIGHTHOUSE_TIMEOUT', 60)
|
||||
elif scanner_class == ZAPScanner:
|
||||
config['zap_host'] = scanner_config.get('ZAP_HOST')
|
||||
config['api_key'] = scanner_config.get('ZAP_API_KEY')
|
||||
config['timeout'] = scanner_config.get('ZAP_TIMEOUT', 120)
|
||||
elif scanner_class == PlaywrightScanner:
|
||||
config['timeout'] = scanner_config.get('PLAYWRIGHT_TIMEOUT', 30000)
|
||||
config['viewport'] = scanner_config.get('PLAYWRIGHT_VIEWPORT', {'width': 1920, 'height': 1080})
|
||||
|
||||
scanner = scanner_class(config=config)
|
||||
scanners.append(scanner)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to initialize {scanner_class.__name__}: {e}")
|
||||
|
||||
return scanners
|
||||
|
||||
def _run_parallel(
|
||||
self,
|
||||
scanners: List[BaseScanner],
|
||||
url: str
|
||||
) -> Dict[str, ScannerResult]:
|
||||
"""Run scanners in parallel using thread pool."""
|
||||
results = {}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
# Submit all scanner tasks
|
||||
future_to_scanner = {
|
||||
executor.submit(self._run_scanner, scanner, url): scanner
|
||||
for scanner in scanners
|
||||
}
|
||||
|
||||
# Collect results as they complete
|
||||
for future in as_completed(future_to_scanner):
|
||||
scanner = future_to_scanner[future]
|
||||
try:
|
||||
result = future.result()
|
||||
results[scanner.name] = result
|
||||
except Exception as e:
|
||||
self.logger.error(f"Scanner {scanner.name} raised exception: {e}")
|
||||
results[scanner.name] = ScannerResult(
|
||||
scanner_name=scanner.name,
|
||||
status=ScannerStatus.FAILED,
|
||||
error_message=str(e)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _run_sequential(
|
||||
self,
|
||||
scanners: List[BaseScanner],
|
||||
url: str
|
||||
) -> Dict[str, ScannerResult]:
|
||||
"""Run scanners sequentially."""
|
||||
results = {}
|
||||
|
||||
for scanner in scanners:
|
||||
result = self._run_scanner(scanner, url)
|
||||
results[scanner.name] = result
|
||||
|
||||
return results
|
||||
|
||||
def _run_scanner(self, scanner: BaseScanner, url: str) -> ScannerResult:
|
||||
"""Run a single scanner with error handling."""
|
||||
self.logger.info(f"Running scanner: {scanner.name}")
|
||||
|
||||
try:
|
||||
# Check availability first
|
||||
if not scanner.is_available():
|
||||
self.logger.warning(f"Scanner {scanner.name} is not available")
|
||||
return ScannerResult(
|
||||
scanner_name=scanner.name,
|
||||
status=ScannerStatus.SKIPPED,
|
||||
error_message=f"{scanner.name} service is not available"
|
||||
)
|
||||
|
||||
# Run the scanner
|
||||
result = scanner.run(url)
|
||||
self.logger.info(
|
||||
f"Scanner {scanner.name} completed with status: {result.status}"
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Scanner {scanner.name} failed: {e}")
|
||||
return ScannerResult(
|
||||
scanner_name=scanner.name,
|
||||
status=ScannerStatus.FAILED,
|
||||
error_message=str(e)
|
||||
)
|
||||
|
||||
def _aggregate_results(
|
||||
self,
|
||||
results: Dict[str, ScannerResult]
|
||||
) -> Dict[str, Any]:
|
||||
"""Aggregate results from all scanners."""
|
||||
all_issues = []
|
||||
all_metrics = []
|
||||
all_scores = {}
|
||||
raw_data = {}
|
||||
errors = []
|
||||
|
||||
successful_scanners = 0
|
||||
failed_scanners = 0
|
||||
|
||||
for scanner_name, result in results.items():
|
||||
# Track scanner status
|
||||
if result.status == ScannerStatus.SUCCESS:
|
||||
successful_scanners += 1
|
||||
elif result.status == ScannerStatus.FAILED:
|
||||
failed_scanners += 1
|
||||
if result.error_message:
|
||||
errors.append({
|
||||
'scanner': scanner_name,
|
||||
'error': result.error_message
|
||||
})
|
||||
elif result.status == ScannerStatus.PARTIAL:
|
||||
successful_scanners += 1
|
||||
|
||||
# Collect issues
|
||||
for issue in result.issues:
|
||||
all_issues.append({
|
||||
'category': issue.category,
|
||||
'severity': issue.severity,
|
||||
'title': issue.title,
|
||||
'description': issue.description,
|
||||
'tool': issue.tool,
|
||||
'affected_url': issue.affected_url,
|
||||
'remediation': issue.remediation,
|
||||
'raw_data': issue.raw_data,
|
||||
})
|
||||
|
||||
# Collect metrics
|
||||
for metric in result.metrics:
|
||||
all_metrics.append({
|
||||
'name': metric.name,
|
||||
'display_name': metric.display_name,
|
||||
'value': metric.value,
|
||||
'unit': metric.unit,
|
||||
'source': metric.source,
|
||||
'score': metric.score,
|
||||
})
|
||||
|
||||
# Collect scores
|
||||
if result.scores:
|
||||
all_scores[scanner_name] = result.scores
|
||||
|
||||
# Store raw data
|
||||
if result.raw_data:
|
||||
raw_data[scanner_name] = result.raw_data
|
||||
|
||||
# Determine overall status
|
||||
if failed_scanners == len(results):
|
||||
overall_status = 'failed'
|
||||
elif failed_scanners > 0:
|
||||
overall_status = 'partial'
|
||||
else:
|
||||
overall_status = 'done'
|
||||
|
||||
# Calculate aggregated scores
|
||||
aggregated_scores = self._calculate_aggregated_scores(all_scores)
|
||||
|
||||
return {
|
||||
'status': overall_status,
|
||||
'scores': aggregated_scores,
|
||||
'issues': all_issues,
|
||||
'metrics': all_metrics,
|
||||
'scanner_results': {
|
||||
name: {
|
||||
'status': result.status.value,
|
||||
'error': result.error_message,
|
||||
}
|
||||
for name, result in results.items()
|
||||
},
|
||||
'raw_data': raw_data,
|
||||
'errors': errors,
|
||||
'summary': {
|
||||
'total_scanners': len(results),
|
||||
'successful': successful_scanners,
|
||||
'failed': failed_scanners,
|
||||
'total_issues': len(all_issues),
|
||||
'total_metrics': len(all_metrics),
|
||||
}
|
||||
}
|
||||
|
||||
def _calculate_aggregated_scores(
|
||||
self,
|
||||
scanner_scores: Dict[str, Dict[str, int]]
|
||||
) -> Dict[str, Optional[int]]:
|
||||
"""Calculate aggregated scores from all scanners."""
|
||||
# Lighthouse provides the main scores
|
||||
lighthouse_scores = scanner_scores.get('lighthouse', {})
|
||||
|
||||
return {
|
||||
'performance': lighthouse_scores.get('performance'),
|
||||
'accessibility': lighthouse_scores.get('accessibility'),
|
||||
'best_practices': lighthouse_scores.get('best_practices'),
|
||||
'seo': lighthouse_scores.get('seo'),
|
||||
}
|
||||
|
|
@ -0,0 +1,380 @@
|
|||
"""
|
||||
TLS/SSL Security Scanner.
|
||||
|
||||
This module checks TLS/SSL configuration and certificate validity.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import socket
|
||||
import ssl
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .base import (
|
||||
BaseScanner,
|
||||
ScannerResult,
|
||||
ScannerStatus,
|
||||
IssueData,
|
||||
MetricData,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TLSScanner(BaseScanner):
|
||||
"""
|
||||
Scanner for TLS/SSL certificate and configuration.
|
||||
|
||||
Checks:
|
||||
- Certificate validity
|
||||
- Certificate expiration
|
||||
- HTTPS availability
|
||||
- HTTP to HTTPS redirect
|
||||
"""
|
||||
|
||||
name = "tls_check"
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
self.timeout = self.config.get('timeout', 10)
|
||||
|
||||
def run(self, url: str) -> ScannerResult:
|
||||
"""
|
||||
Run TLS/SSL analysis on the URL.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
ScannerResult with TLS findings
|
||||
"""
|
||||
self.logger.info(f"Starting TLS scan for {url}")
|
||||
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.netloc.split(':')[0]
|
||||
port = parsed.port or (443 if parsed.scheme == 'https' else 80)
|
||||
|
||||
issues = []
|
||||
metrics = []
|
||||
raw_data = {}
|
||||
|
||||
# Check if site is HTTPS
|
||||
if parsed.scheme == 'http':
|
||||
# Check if HTTPS is available
|
||||
https_available, https_result = self._check_https_available(hostname)
|
||||
raw_data['https_available'] = https_available
|
||||
raw_data['https_check'] = https_result
|
||||
|
||||
if https_available:
|
||||
issues.append(IssueData(
|
||||
category='tls',
|
||||
severity='high',
|
||||
title='Site accessed over HTTP but HTTPS is available',
|
||||
description=(
|
||||
'The site was accessed over unencrypted HTTP, but HTTPS '
|
||||
'appears to be available. All traffic should use HTTPS.'
|
||||
),
|
||||
tool='tls_check',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
'Redirect all HTTP traffic to HTTPS using a 301 redirect. '
|
||||
'Implement HSTS to prevent future HTTP access.'
|
||||
)
|
||||
))
|
||||
else:
|
||||
issues.append(IssueData(
|
||||
category='tls',
|
||||
severity='critical',
|
||||
title='Site does not support HTTPS',
|
||||
description=(
|
||||
'The site does not appear to have HTTPS configured. '
|
||||
'All data transmitted is unencrypted and vulnerable to interception.'
|
||||
),
|
||||
tool='tls_check',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
'Configure TLS/SSL for your server. Obtain a certificate from '
|
||||
"Let's Encrypt (free) or a commercial CA."
|
||||
)
|
||||
))
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='tls_enabled',
|
||||
display_name='TLS Enabled',
|
||||
value=0.0,
|
||||
unit='score',
|
||||
source='tls_check'
|
||||
))
|
||||
|
||||
return ScannerResult(
|
||||
scanner_name=self.name,
|
||||
status=ScannerStatus.SUCCESS,
|
||||
issues=issues,
|
||||
metrics=metrics,
|
||||
raw_data=raw_data
|
||||
)
|
||||
|
||||
# For HTTPS URLs, check certificate
|
||||
cert_info = self._get_certificate_info(hostname, port)
|
||||
raw_data['certificate'] = cert_info
|
||||
|
||||
if cert_info.get('error'):
|
||||
issues.append(IssueData(
|
||||
category='tls',
|
||||
severity='critical',
|
||||
title='Certificate validation failed',
|
||||
description=f"SSL certificate error: {cert_info['error']}",
|
||||
tool='tls_check',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
'Ensure your SSL certificate is valid, not expired, '
|
||||
'and properly configured for your domain.'
|
||||
)
|
||||
))
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='certificate_valid',
|
||||
display_name='Certificate Valid',
|
||||
value=0.0,
|
||||
unit='score',
|
||||
source='tls_check'
|
||||
))
|
||||
else:
|
||||
# Certificate is valid
|
||||
metrics.append(MetricData(
|
||||
name='certificate_valid',
|
||||
display_name='Certificate Valid',
|
||||
value=1.0,
|
||||
unit='score',
|
||||
source='tls_check'
|
||||
))
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='tls_enabled',
|
||||
display_name='TLS Enabled',
|
||||
value=1.0,
|
||||
unit='score',
|
||||
source='tls_check'
|
||||
))
|
||||
|
||||
# Check expiration
|
||||
if cert_info.get('expires'):
|
||||
try:
|
||||
expires = datetime.strptime(
|
||||
cert_info['expires'],
|
||||
'%b %d %H:%M:%S %Y %Z'
|
||||
)
|
||||
expires = expires.replace(tzinfo=timezone.utc)
|
||||
now = datetime.now(timezone.utc)
|
||||
days_until_expiry = (expires - now).days
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='certificate_days_until_expiry',
|
||||
display_name='Days Until Certificate Expiry',
|
||||
value=float(days_until_expiry),
|
||||
unit='count',
|
||||
source='tls_check'
|
||||
))
|
||||
|
||||
if days_until_expiry <= 0:
|
||||
issues.append(IssueData(
|
||||
category='tls',
|
||||
severity='critical',
|
||||
title='SSL certificate has expired',
|
||||
description=(
|
||||
f"The SSL certificate expired on {cert_info['expires']}. "
|
||||
"Users will see security warnings."
|
||||
),
|
||||
tool='tls_check',
|
||||
affected_url=url,
|
||||
remediation='Renew your SSL certificate immediately.'
|
||||
))
|
||||
elif days_until_expiry <= 7:
|
||||
issues.append(IssueData(
|
||||
category='tls',
|
||||
severity='high',
|
||||
title='SSL certificate expiring very soon',
|
||||
description=(
|
||||
f"The SSL certificate will expire in {days_until_expiry} days "
|
||||
f"(on {cert_info['expires']}). Renew immediately."
|
||||
),
|
||||
tool='tls_check',
|
||||
affected_url=url,
|
||||
remediation='Renew your SSL certificate before it expires.'
|
||||
))
|
||||
elif days_until_expiry <= 30:
|
||||
issues.append(IssueData(
|
||||
category='tls',
|
||||
severity='medium',
|
||||
title='SSL certificate expiring soon',
|
||||
description=(
|
||||
f"The SSL certificate will expire in {days_until_expiry} days "
|
||||
f"(on {cert_info['expires']}). Plan for renewal."
|
||||
),
|
||||
tool='tls_check',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
'Renew your SSL certificate before expiration. '
|
||||
"Consider using auto-renewal with Let's Encrypt."
|
||||
)
|
||||
))
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not parse certificate expiry: {e}")
|
||||
|
||||
# Check certificate subject matches hostname
|
||||
if cert_info.get('subject'):
|
||||
subject_cn = dict(x[0] for x in cert_info['subject']).get('commonName', '')
|
||||
san = cert_info.get('subjectAltName', [])
|
||||
san_names = [name for type_, name in san if type_ == 'DNS']
|
||||
|
||||
hostname_matched = self._hostname_matches_cert(
|
||||
hostname, subject_cn, san_names
|
||||
)
|
||||
|
||||
if not hostname_matched:
|
||||
issues.append(IssueData(
|
||||
category='tls',
|
||||
severity='high',
|
||||
title='Certificate hostname mismatch',
|
||||
description=(
|
||||
f"The SSL certificate is for '{subject_cn}' but "
|
||||
f"the site is accessed as '{hostname}'."
|
||||
),
|
||||
tool='tls_check',
|
||||
affected_url=url,
|
||||
remediation=(
|
||||
'Obtain a certificate that includes your domain name, '
|
||||
'or add it to the Subject Alternative Names (SAN).'
|
||||
)
|
||||
))
|
||||
|
||||
# Check for HTTP to HTTPS redirect
|
||||
if parsed.scheme == 'https':
|
||||
redirect_info = self._check_http_redirect(hostname)
|
||||
raw_data['http_redirect'] = redirect_info
|
||||
|
||||
if not redirect_info.get('redirects_to_https'):
|
||||
issues.append(IssueData(
|
||||
category='tls',
|
||||
severity='medium',
|
||||
title='No HTTP to HTTPS redirect',
|
||||
description=(
|
||||
'The site does not redirect HTTP requests to HTTPS. '
|
||||
'Users accessing via HTTP will use an insecure connection.'
|
||||
),
|
||||
tool='tls_check',
|
||||
affected_url=f"http://{hostname}",
|
||||
remediation=(
|
||||
'Configure your server to redirect all HTTP (port 80) '
|
||||
'requests to HTTPS (port 443) with a 301 redirect.'
|
||||
)
|
||||
))
|
||||
|
||||
self.logger.info(f"TLS scan complete: {len(issues)} issues")
|
||||
|
||||
return ScannerResult(
|
||||
scanner_name=self.name,
|
||||
status=ScannerStatus.SUCCESS,
|
||||
issues=issues,
|
||||
metrics=metrics,
|
||||
raw_data=raw_data
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return self._create_error_result(e)
|
||||
|
||||
def _check_https_available(self, hostname: str) -> tuple:
|
||||
"""Check if HTTPS is available for the hostname."""
|
||||
try:
|
||||
context = ssl.create_default_context()
|
||||
with socket.create_connection((hostname, 443), timeout=self.timeout) as sock:
|
||||
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
|
||||
return True, {'available': True, 'protocol': ssock.version()}
|
||||
except ssl.SSLError as e:
|
||||
return True, {'available': True, 'error': str(e)}
|
||||
except Exception as e:
|
||||
return False, {'available': False, 'error': str(e)}
|
||||
|
||||
def _get_certificate_info(self, hostname: str, port: int = 443) -> Dict:
|
||||
"""Get SSL certificate information."""
|
||||
try:
|
||||
context = ssl.create_default_context()
|
||||
|
||||
with socket.create_connection((hostname, port), timeout=self.timeout) as sock:
|
||||
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
|
||||
cert = ssock.getpeercert()
|
||||
|
||||
return {
|
||||
'subject': cert.get('subject'),
|
||||
'issuer': cert.get('issuer'),
|
||||
'version': cert.get('version'),
|
||||
'serialNumber': cert.get('serialNumber'),
|
||||
'notBefore': cert.get('notBefore'),
|
||||
'expires': cert.get('notAfter'),
|
||||
'subjectAltName': cert.get('subjectAltName', []),
|
||||
'protocol': ssock.version(),
|
||||
'cipher': ssock.cipher(),
|
||||
}
|
||||
except ssl.SSLCertVerificationError as e:
|
||||
return {'error': f"Certificate verification failed: {e.verify_message}"}
|
||||
except ssl.SSLError as e:
|
||||
return {'error': f"SSL error: {str(e)}"}
|
||||
except socket.timeout:
|
||||
return {'error': "Connection timed out"}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
def _hostname_matches_cert(
|
||||
self,
|
||||
hostname: str,
|
||||
cn: str,
|
||||
san_names: list
|
||||
) -> bool:
|
||||
"""Check if hostname matches certificate CN or SAN."""
|
||||
all_names = [cn] + san_names
|
||||
|
||||
for name in all_names:
|
||||
if name == hostname:
|
||||
return True
|
||||
# Handle wildcard certificates
|
||||
if name.startswith('*.'):
|
||||
domain = name[2:]
|
||||
if hostname.endswith(domain):
|
||||
# Ensure wildcard only matches one level
|
||||
prefix = hostname[:-len(domain)-1]
|
||||
if '.' not in prefix:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _check_http_redirect(self, hostname: str) -> Dict:
|
||||
"""Check if HTTP redirects to HTTPS."""
|
||||
import httpx
|
||||
|
||||
try:
|
||||
with httpx.Client(
|
||||
timeout=self.timeout,
|
||||
follow_redirects=False
|
||||
) as client:
|
||||
response = client.get(f"http://{hostname}")
|
||||
|
||||
if response.status_code in (301, 302, 303, 307, 308):
|
||||
location = response.headers.get('location', '')
|
||||
redirects_to_https = location.startswith('https://')
|
||||
return {
|
||||
'redirects_to_https': redirects_to_https,
|
||||
'status_code': response.status_code,
|
||||
'location': location,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'redirects_to_https': False,
|
||||
'status_code': response.status_code,
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'redirects_to_https': False,
|
||||
'error': str(e),
|
||||
}
|
||||
|
|
@ -0,0 +1,307 @@
|
|||
"""
|
||||
OWASP ZAP Scanner Integration.
|
||||
|
||||
This module integrates with OWASP ZAP for security scanning,
|
||||
detecting vulnerabilities like XSS, injection flaws, and
|
||||
misconfigurations.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from .base import (
|
||||
BaseScanner,
|
||||
ScannerResult,
|
||||
ScannerStatus,
|
||||
IssueData,
|
||||
MetricData,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ZAPScanner(BaseScanner):
|
||||
"""
|
||||
Scanner using OWASP ZAP for security vulnerability detection.
|
||||
|
||||
Performs baseline scans to identify common security issues:
|
||||
- XSS vulnerabilities
|
||||
- SQL injection patterns
|
||||
- Insecure cookies
|
||||
- Missing security headers
|
||||
- SSL/TLS issues
|
||||
- And more...
|
||||
"""
|
||||
|
||||
name = "owasp_zap"
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
scanner_config = settings.SCANNER_CONFIG
|
||||
self.zap_host = self.config.get('zap_host', scanner_config.get('ZAP_HOST', 'http://zap:8080'))
|
||||
self.api_key = self.config.get('api_key', scanner_config.get('ZAP_API_KEY', ''))
|
||||
self.timeout = self.config.get('timeout', scanner_config.get('ZAP_TIMEOUT', 120))
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if ZAP service is available."""
|
||||
try:
|
||||
with httpx.Client(timeout=10) as client:
|
||||
response = client.get(
|
||||
f"{self.zap_host}/JSON/core/view/version/",
|
||||
params={'apikey': self.api_key}
|
||||
)
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
self.logger.warning(f"ZAP service not available: {e}")
|
||||
return False
|
||||
|
||||
def run(self, url: str) -> ScannerResult:
|
||||
"""
|
||||
Run ZAP security scan against the URL.
|
||||
|
||||
Args:
|
||||
url: The URL to scan
|
||||
|
||||
Returns:
|
||||
ScannerResult with security findings
|
||||
"""
|
||||
self.logger.info(f"Starting ZAP scan for {url}")
|
||||
|
||||
try:
|
||||
# Access the target to populate ZAP's site tree
|
||||
self._access_url(url)
|
||||
|
||||
# Spider the site (limited crawl)
|
||||
self._spider_url(url)
|
||||
|
||||
# Run active scan
|
||||
self._active_scan(url)
|
||||
|
||||
# Get alerts
|
||||
alerts = self._get_alerts(url)
|
||||
|
||||
return self._parse_results(url, alerts)
|
||||
|
||||
except httpx.TimeoutException:
|
||||
return self._create_error_result(
|
||||
Exception("ZAP scan timed out")
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
return self._create_error_result(
|
||||
Exception(f"ZAP service error: {e.response.status_code}")
|
||||
)
|
||||
except Exception as e:
|
||||
return self._create_error_result(e)
|
||||
|
||||
def _zap_request(self, endpoint: str, params: Optional[Dict] = None) -> Dict:
|
||||
"""Make a request to the ZAP API."""
|
||||
if params is None:
|
||||
params = {}
|
||||
params['apikey'] = self.api_key
|
||||
|
||||
with httpx.Client(timeout=self.timeout) as client:
|
||||
response = client.get(
|
||||
f"{self.zap_host}{endpoint}",
|
||||
params=params
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def _access_url(self, url: str) -> None:
|
||||
"""Access the URL to add it to ZAP's site tree."""
|
||||
self.logger.debug(f"Accessing URL in ZAP: {url}")
|
||||
self._zap_request(
|
||||
'/JSON/core/action/accessUrl/',
|
||||
{'url': url, 'followRedirects': 'true'}
|
||||
)
|
||||
time.sleep(2) # Wait for ZAP to process
|
||||
|
||||
def _spider_url(self, url: str) -> None:
|
||||
"""Spider the URL to discover pages."""
|
||||
self.logger.debug(f"Spidering URL: {url}")
|
||||
|
||||
# Start spider
|
||||
result = self._zap_request(
|
||||
'/JSON/spider/action/scan/',
|
||||
{
|
||||
'url': url,
|
||||
'maxChildren': '5', # Limited crawl
|
||||
'recurse': 'true',
|
||||
'subtreeOnly': 'true'
|
||||
}
|
||||
)
|
||||
|
||||
scan_id = result.get('scan')
|
||||
if not scan_id:
|
||||
return
|
||||
|
||||
# Wait for spider to complete (with timeout)
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 60: # 60 second spider timeout
|
||||
status = self._zap_request(
|
||||
'/JSON/spider/view/status/',
|
||||
{'scanId': scan_id}
|
||||
)
|
||||
if int(status.get('status', '100')) >= 100:
|
||||
break
|
||||
time.sleep(2)
|
||||
|
||||
def _active_scan(self, url: str) -> None:
|
||||
"""Run active scan against the URL."""
|
||||
self.logger.debug(f"Starting active scan: {url}")
|
||||
|
||||
# Start active scan
|
||||
result = self._zap_request(
|
||||
'/JSON/ascan/action/scan/',
|
||||
{
|
||||
'url': url,
|
||||
'recurse': 'true',
|
||||
'inScopeOnly': 'true'
|
||||
}
|
||||
)
|
||||
|
||||
scan_id = result.get('scan')
|
||||
if not scan_id:
|
||||
return
|
||||
|
||||
# Wait for scan to complete (with timeout)
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < self.timeout:
|
||||
status = self._zap_request(
|
||||
'/JSON/ascan/view/status/',
|
||||
{'scanId': scan_id}
|
||||
)
|
||||
if int(status.get('status', '100')) >= 100:
|
||||
break
|
||||
time.sleep(5)
|
||||
|
||||
def _get_alerts(self, url: str) -> List[Dict]:
|
||||
"""Get alerts for the scanned URL."""
|
||||
self.logger.debug(f"Fetching alerts for: {url}")
|
||||
|
||||
result = self._zap_request(
|
||||
'/JSON/core/view/alerts/',
|
||||
{
|
||||
'baseurl': url,
|
||||
'start': '0',
|
||||
'count': '100' # Limit alerts
|
||||
}
|
||||
)
|
||||
|
||||
return result.get('alerts', [])
|
||||
|
||||
def _parse_results(self, url: str, alerts: List[Dict]) -> ScannerResult:
|
||||
"""
|
||||
Parse ZAP alerts into ScannerResult format.
|
||||
|
||||
Args:
|
||||
url: The scanned URL
|
||||
alerts: List of ZAP alerts
|
||||
|
||||
Returns:
|
||||
Parsed ScannerResult
|
||||
"""
|
||||
issues = []
|
||||
metrics = []
|
||||
|
||||
# Count alerts by risk level
|
||||
risk_counts = {
|
||||
'High': 0,
|
||||
'Medium': 0,
|
||||
'Low': 0,
|
||||
'Informational': 0
|
||||
}
|
||||
|
||||
for alert in alerts:
|
||||
risk = alert.get('risk', 'Informational')
|
||||
risk_counts[risk] = risk_counts.get(risk, 0) + 1
|
||||
|
||||
severity = self._map_risk_to_severity(risk)
|
||||
|
||||
issues.append(IssueData(
|
||||
category='security',
|
||||
severity=severity,
|
||||
title=alert.get('name', 'Unknown vulnerability'),
|
||||
description=self._format_description(alert),
|
||||
tool='owasp_zap',
|
||||
affected_url=alert.get('url', url),
|
||||
remediation=alert.get('solution', 'Review and fix the vulnerability.'),
|
||||
raw_data={
|
||||
'alert_ref': alert.get('alertRef'),
|
||||
'cweid': alert.get('cweid'),
|
||||
'wascid': alert.get('wascid'),
|
||||
'confidence': alert.get('confidence'),
|
||||
'evidence': alert.get('evidence', '')[:500], # Truncate evidence
|
||||
}
|
||||
))
|
||||
|
||||
# Create metrics for vulnerability counts
|
||||
for risk_level, count in risk_counts.items():
|
||||
if count > 0:
|
||||
metrics.append(MetricData(
|
||||
name=f'zap_{risk_level.lower()}_alerts',
|
||||
display_name=f'{risk_level} Risk Alerts',
|
||||
value=float(count),
|
||||
unit='count',
|
||||
source='owasp_zap'
|
||||
))
|
||||
|
||||
metrics.append(MetricData(
|
||||
name='total_security_alerts',
|
||||
display_name='Total Security Alerts',
|
||||
value=float(len(alerts)),
|
||||
unit='count',
|
||||
source='owasp_zap'
|
||||
))
|
||||
|
||||
self.logger.info(
|
||||
f"ZAP scan complete: {len(alerts)} alerts "
|
||||
f"(High: {risk_counts['High']}, Medium: {risk_counts['Medium']}, "
|
||||
f"Low: {risk_counts['Low']})"
|
||||
)
|
||||
|
||||
return ScannerResult(
|
||||
scanner_name=self.name,
|
||||
status=ScannerStatus.SUCCESS,
|
||||
issues=issues,
|
||||
metrics=metrics,
|
||||
raw_data={
|
||||
'total_alerts': len(alerts),
|
||||
'risk_counts': risk_counts,
|
||||
'alerts': alerts[:50] # Store limited raw alerts
|
||||
}
|
||||
)
|
||||
|
||||
def _map_risk_to_severity(self, risk: str) -> str:
|
||||
"""Map ZAP risk level to our severity."""
|
||||
mapping = {
|
||||
'High': 'high',
|
||||
'Medium': 'medium',
|
||||
'Low': 'low',
|
||||
'Informational': 'info',
|
||||
}
|
||||
return mapping.get(risk, 'info')
|
||||
|
||||
def _format_description(self, alert: Dict) -> str:
|
||||
"""Format ZAP alert into readable description."""
|
||||
parts = []
|
||||
|
||||
if alert.get('description'):
|
||||
parts.append(alert['description'])
|
||||
|
||||
if alert.get('attack'):
|
||||
parts.append(f"\nAttack: {alert['attack']}")
|
||||
|
||||
if alert.get('evidence'):
|
||||
evidence = alert['evidence'][:200]
|
||||
parts.append(f"\nEvidence: {evidence}")
|
||||
|
||||
if alert.get('reference'):
|
||||
parts.append(f"\nReference: {alert['reference']}")
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
|
@ -0,0 +1,306 @@
|
|||
"""
|
||||
Celery tasks for background scanning.
|
||||
|
||||
This module defines the Celery tasks that orchestrate website scans
|
||||
in the background.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
from typing import Optional
|
||||
|
||||
from celery import shared_task
|
||||
from celery.exceptions import SoftTimeLimitExceeded
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
from websites.models import Website, Scan, ScanStatus, Issue, Metric
|
||||
from scanner.scanners import ScanRunner
|
||||
from scanner.utils import validate_url, get_domain_from_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@shared_task(
|
||||
bind=True,
|
||||
max_retries=2,
|
||||
default_retry_delay=60,
|
||||
soft_time_limit=300,
|
||||
time_limit=330,
|
||||
)
|
||||
def run_scan_task(self, scan_id: str) -> dict:
|
||||
"""
|
||||
Main Celery task for running a website scan.
|
||||
|
||||
This task:
|
||||
1. Updates scan status to running
|
||||
2. Orchestrates all scanners
|
||||
3. Saves results to database
|
||||
4. Handles errors and partial results
|
||||
|
||||
Args:
|
||||
scan_id: UUID of the Scan record
|
||||
|
||||
Returns:
|
||||
Dict with scan results summary
|
||||
"""
|
||||
logger.info(f"Starting scan task for scan_id: {scan_id}")
|
||||
|
||||
try:
|
||||
# Get the scan record
|
||||
scan = Scan.objects.select_related('website').get(id=scan_id)
|
||||
except Scan.DoesNotExist:
|
||||
logger.error(f"Scan {scan_id} not found")
|
||||
return {'error': f'Scan {scan_id} not found'}
|
||||
|
||||
# Update status to running
|
||||
scan.status = ScanStatus.RUNNING
|
||||
scan.started_at = timezone.now()
|
||||
scan.celery_task_id = self.request.id
|
||||
scan.save(update_fields=['status', 'started_at', 'celery_task_id'])
|
||||
|
||||
url = scan.website.url
|
||||
|
||||
try:
|
||||
# Run the scan pipeline
|
||||
runner = ScanRunner()
|
||||
results = runner.run(url)
|
||||
|
||||
# Save results to database
|
||||
_save_scan_results(scan, results)
|
||||
|
||||
# Update website last_scanned_at
|
||||
scan.website.last_scanned_at = timezone.now()
|
||||
scan.website.save(update_fields=['last_scanned_at'])
|
||||
|
||||
logger.info(f"Scan {scan_id} completed successfully")
|
||||
|
||||
return {
|
||||
'scan_id': str(scan_id),
|
||||
'status': scan.status,
|
||||
'overall_score': scan.overall_score,
|
||||
'issues_count': scan.issues.count(),
|
||||
'metrics_count': scan.metrics.count(),
|
||||
}
|
||||
|
||||
except SoftTimeLimitExceeded:
|
||||
logger.warning(f"Scan {scan_id} timed out")
|
||||
scan.status = ScanStatus.PARTIAL
|
||||
scan.error_message = "Scan timed out before completing all checks"
|
||||
scan.completed_at = timezone.now()
|
||||
scan.save(update_fields=['status', 'error_message', 'completed_at'])
|
||||
|
||||
return {
|
||||
'scan_id': str(scan_id),
|
||||
'status': 'partial',
|
||||
'error': 'Scan timed out'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Scan {scan_id} failed with error: {e}")
|
||||
scan.status = ScanStatus.FAILED
|
||||
scan.error_message = str(e)
|
||||
scan.completed_at = timezone.now()
|
||||
scan.save(update_fields=['status', 'error_message', 'completed_at'])
|
||||
|
||||
# Retry on certain errors
|
||||
if self.request.retries < self.max_retries:
|
||||
raise self.retry(exc=e)
|
||||
|
||||
return {
|
||||
'scan_id': str(scan_id),
|
||||
'status': 'failed',
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
|
||||
def _save_scan_results(scan: Scan, results: dict) -> None:
|
||||
"""
|
||||
Save scan results to the database.
|
||||
|
||||
Args:
|
||||
scan: The Scan model instance
|
||||
results: Aggregated results from ScanRunner
|
||||
"""
|
||||
# Update scan status
|
||||
status_map = {
|
||||
'done': ScanStatus.DONE,
|
||||
'partial': ScanStatus.PARTIAL,
|
||||
'failed': ScanStatus.FAILED,
|
||||
}
|
||||
scan.status = status_map.get(results['status'], ScanStatus.DONE)
|
||||
scan.completed_at = timezone.now()
|
||||
|
||||
# Save scores
|
||||
scores = results.get('scores', {})
|
||||
scan.performance_score = scores.get('performance')
|
||||
scan.accessibility_score = scores.get('accessibility')
|
||||
scan.seo_score = scores.get('seo')
|
||||
scan.best_practices_score = scores.get('best_practices')
|
||||
|
||||
# Save raw data
|
||||
raw_data = results.get('raw_data', {})
|
||||
scan.raw_lighthouse_data = raw_data.get('lighthouse')
|
||||
scan.raw_zap_data = raw_data.get('owasp_zap')
|
||||
scan.raw_playwright_data = raw_data.get('playwright')
|
||||
scan.raw_headers_data = raw_data.get('header_check')
|
||||
|
||||
# Save errors if any
|
||||
if results.get('errors'):
|
||||
scan.error_message = '\n'.join(
|
||||
f"{e['scanner']}: {e['error']}"
|
||||
for e in results['errors']
|
||||
)
|
||||
|
||||
scan.save()
|
||||
|
||||
# Create Issue records
|
||||
issues_to_create = []
|
||||
for issue_data in results.get('issues', []):
|
||||
issues_to_create.append(Issue(
|
||||
scan=scan,
|
||||
category=issue_data['category'],
|
||||
severity=issue_data['severity'],
|
||||
title=issue_data['title'][:500], # Truncate if too long
|
||||
description=issue_data['description'],
|
||||
tool=issue_data['tool'],
|
||||
affected_url=issue_data.get('affected_url'),
|
||||
remediation=issue_data.get('remediation'),
|
||||
raw_data=issue_data.get('raw_data'),
|
||||
))
|
||||
|
||||
if issues_to_create:
|
||||
Issue.objects.bulk_create(issues_to_create)
|
||||
|
||||
# Create Metric records
|
||||
metrics_to_create = []
|
||||
seen_metrics = set() # Track unique metrics
|
||||
|
||||
for metric_data in results.get('metrics', []):
|
||||
metric_key = metric_data['name']
|
||||
if metric_key in seen_metrics:
|
||||
continue # Skip duplicates
|
||||
seen_metrics.add(metric_key)
|
||||
|
||||
# Map unit strings to model choices
|
||||
unit_map = {
|
||||
'ms': 'ms',
|
||||
'milliseconds': 'ms',
|
||||
's': 's',
|
||||
'seconds': 's',
|
||||
'bytes': 'bytes',
|
||||
'kb': 'kb',
|
||||
'kilobytes': 'kb',
|
||||
'mb': 'mb',
|
||||
'megabytes': 'mb',
|
||||
'score': 'score',
|
||||
'percent': 'percent',
|
||||
'count': 'count',
|
||||
}
|
||||
unit = unit_map.get(metric_data['unit'].lower(), 'count')
|
||||
|
||||
metrics_to_create.append(Metric(
|
||||
scan=scan,
|
||||
name=metric_data['name'],
|
||||
display_name=metric_data['display_name'][:200],
|
||||
value=metric_data['value'],
|
||||
unit=unit,
|
||||
source=metric_data['source'],
|
||||
score=metric_data.get('score'),
|
||||
))
|
||||
|
||||
if metrics_to_create:
|
||||
Metric.objects.bulk_create(metrics_to_create)
|
||||
|
||||
# Calculate security score based on issues
|
||||
scan.calculate_security_score()
|
||||
|
||||
# Calculate overall score
|
||||
scan.calculate_overall_score()
|
||||
|
||||
scan.save(update_fields=['security_score', 'overall_score'])
|
||||
|
||||
logger.info(
|
||||
f"Saved scan results: {len(issues_to_create)} issues, "
|
||||
f"{len(metrics_to_create)} metrics"
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
def cleanup_old_scans(days: int = 30) -> dict:
|
||||
"""
|
||||
Clean up old scan data to prevent database growth.
|
||||
|
||||
Args:
|
||||
days: Number of days to keep scans
|
||||
|
||||
Returns:
|
||||
Dict with cleanup statistics
|
||||
"""
|
||||
cutoff_date = timezone.now() - timedelta(days=days)
|
||||
|
||||
# Delete old scans (cascades to issues and metrics)
|
||||
deleted_count, _ = Scan.objects.filter(
|
||||
created_at__lt=cutoff_date
|
||||
).delete()
|
||||
|
||||
logger.info(f"Cleaned up {deleted_count} old scans")
|
||||
|
||||
return {
|
||||
'deleted_scans': deleted_count,
|
||||
'cutoff_date': cutoff_date.isoformat(),
|
||||
}
|
||||
|
||||
|
||||
def check_rate_limit(url: str) -> Optional[str]:
|
||||
"""
|
||||
Check if URL scanning is rate limited.
|
||||
|
||||
Args:
|
||||
url: The URL to check
|
||||
|
||||
Returns:
|
||||
Error message if rate limited, None otherwise
|
||||
"""
|
||||
from django.core.cache import cache
|
||||
|
||||
scanner_config = settings.SCANNER_CONFIG
|
||||
rate_limit_minutes = scanner_config.get('SCAN_RATE_LIMIT_MINUTES', 5)
|
||||
|
||||
# Create a cache key based on the URL
|
||||
domain = get_domain_from_url(url)
|
||||
cache_key = f"scan_rate_limit:{domain}"
|
||||
|
||||
# Check if already scanned recently
|
||||
last_scan_time = cache.get(cache_key)
|
||||
if last_scan_time:
|
||||
return (
|
||||
f"This URL was scanned recently. "
|
||||
f"Please wait {rate_limit_minutes} minutes between scans."
|
||||
)
|
||||
|
||||
# Set the rate limit
|
||||
cache.set(cache_key, timezone.now().isoformat(), timeout=rate_limit_minutes * 60)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def check_concurrent_scan_limit() -> Optional[str]:
|
||||
"""
|
||||
Check if maximum concurrent scans limit is reached.
|
||||
|
||||
Returns:
|
||||
Error message if limit reached, None otherwise
|
||||
"""
|
||||
scanner_config = settings.SCANNER_CONFIG
|
||||
max_concurrent = scanner_config.get('MAX_CONCURRENT_SCANS', 3)
|
||||
|
||||
running_count = Scan.objects.filter(status=ScanStatus.RUNNING).count()
|
||||
|
||||
if running_count >= max_concurrent:
|
||||
return (
|
||||
f"Maximum concurrent scans ({max_concurrent}) reached. "
|
||||
"Please wait for current scans to complete."
|
||||
)
|
||||
|
||||
return None
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
"""
|
||||
URL validation and safety utilities.
|
||||
|
||||
This module provides functions for validating and normalizing URLs,
|
||||
including safety checks to prevent SSRF attacks.
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import socket
|
||||
from typing import Tuple
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
import validators
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def validate_url(url: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Validate and normalize a URL for scanning.
|
||||
|
||||
Args:
|
||||
url: The URL to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, normalized_url_or_error_message)
|
||||
"""
|
||||
if not url:
|
||||
return False, "URL is required"
|
||||
|
||||
# Basic URL validation
|
||||
if not validators.url(url):
|
||||
return False, "Invalid URL format"
|
||||
|
||||
# Parse the URL
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
except Exception as e:
|
||||
return False, f"Could not parse URL: {e}"
|
||||
|
||||
# Check scheme
|
||||
if parsed.scheme not in ('http', 'https'):
|
||||
return False, "URL must use http or https scheme"
|
||||
|
||||
# Check hostname
|
||||
hostname = parsed.netloc.split(':')[0].lower()
|
||||
|
||||
if not hostname:
|
||||
return False, "URL must have a valid hostname"
|
||||
|
||||
# Safety check: block localhost and private IPs
|
||||
is_safe, safety_error = check_url_safety(hostname)
|
||||
if not is_safe:
|
||||
return False, safety_error
|
||||
|
||||
# Normalize URL
|
||||
normalized = normalize_url(url)
|
||||
|
||||
return True, normalized
|
||||
|
||||
|
||||
def normalize_url(url: str) -> str:
|
||||
"""
|
||||
Normalize a URL to a canonical form.
|
||||
|
||||
- Lowercase hostname
|
||||
- Remove trailing slashes from path
|
||||
- Remove default ports
|
||||
- Sort query parameters
|
||||
|
||||
Args:
|
||||
url: The URL to normalize
|
||||
|
||||
Returns:
|
||||
Normalized URL string
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
|
||||
# Lowercase hostname
|
||||
hostname = parsed.netloc.lower()
|
||||
|
||||
# Remove default ports
|
||||
if ':80' in hostname and parsed.scheme == 'http':
|
||||
hostname = hostname.replace(':80', '')
|
||||
elif ':443' in hostname and parsed.scheme == 'https':
|
||||
hostname = hostname.replace(':443', '')
|
||||
|
||||
# Normalize path (remove trailing slash except for root)
|
||||
path = parsed.path
|
||||
if path != '/' and path.endswith('/'):
|
||||
path = path.rstrip('/')
|
||||
if not path:
|
||||
path = '/'
|
||||
|
||||
# Reconstruct URL
|
||||
normalized = urlunparse((
|
||||
parsed.scheme,
|
||||
hostname,
|
||||
path,
|
||||
parsed.params,
|
||||
parsed.query,
|
||||
'' # Remove fragment
|
||||
))
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def check_url_safety(hostname: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Check if a hostname is safe to scan (not localhost/private IP).
|
||||
|
||||
Args:
|
||||
hostname: The hostname to check
|
||||
|
||||
Returns:
|
||||
Tuple of (is_safe, error_message_if_not_safe)
|
||||
"""
|
||||
scanner_config = settings.SCANNER_CONFIG
|
||||
blocked_hosts = scanner_config.get('BLOCKED_HOSTS', [])
|
||||
blocked_ranges = scanner_config.get('BLOCKED_IP_RANGES', [])
|
||||
|
||||
# Check blocked hostnames
|
||||
if hostname in blocked_hosts:
|
||||
return False, f"Scanning {hostname} is not allowed"
|
||||
|
||||
# Try to resolve hostname to IP
|
||||
try:
|
||||
ip_addresses = socket.getaddrinfo(
|
||||
hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
|
||||
)
|
||||
except socket.gaierror:
|
||||
# Could not resolve - might be okay for some hostnames
|
||||
logger.warning(f"Could not resolve hostname: {hostname}")
|
||||
return True, ""
|
||||
|
||||
for family, type_, proto, canonname, sockaddr in ip_addresses:
|
||||
ip_str = sockaddr[0]
|
||||
|
||||
try:
|
||||
ip = ipaddress.ip_address(ip_str)
|
||||
|
||||
# Check if IP is in any blocked range
|
||||
for blocked_range in blocked_ranges:
|
||||
try:
|
||||
network = ipaddress.ip_network(blocked_range, strict=False)
|
||||
if ip in network:
|
||||
return False, f"Scanning private/local IP addresses is not allowed ({ip_str})"
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Additional checks
|
||||
if ip.is_private:
|
||||
return False, f"Scanning private IP addresses is not allowed ({ip_str})"
|
||||
|
||||
if ip.is_loopback:
|
||||
return False, f"Scanning localhost/loopback addresses is not allowed ({ip_str})"
|
||||
|
||||
if ip.is_link_local:
|
||||
return False, f"Scanning link-local addresses is not allowed ({ip_str})"
|
||||
|
||||
if ip.is_reserved:
|
||||
return False, f"Scanning reserved IP addresses is not allowed ({ip_str})"
|
||||
|
||||
except ValueError:
|
||||
# Not a valid IP address format
|
||||
continue
|
||||
|
||||
return True, ""
|
||||
|
||||
|
||||
def get_domain_from_url(url: str) -> str:
|
||||
"""
|
||||
Extract the domain from a URL.
|
||||
|
||||
Args:
|
||||
url: The URL to extract domain from
|
||||
|
||||
Returns:
|
||||
The domain/hostname
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
return parsed.netloc.split(':')[0].lower()
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{% block title %}Website Analyzer{% endblock %}</title>
|
||||
|
||||
<!-- Tailwind CSS -->
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
|
||||
<!-- Alpine.js for interactivity -->
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
|
||||
|
||||
<!-- Chart.js for visualizations -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
|
||||
<style>
|
||||
[x-cloak] { display: none !important; }
|
||||
|
||||
/* Custom animations */
|
||||
@keyframes pulse-slow {
|
||||
0%, 100% { opacity: 1; }
|
||||
50% { opacity: 0.5; }
|
||||
}
|
||||
.animate-pulse-slow {
|
||||
animation: pulse-slow 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
|
||||
}
|
||||
|
||||
/* Score circle gradient */
|
||||
.score-circle {
|
||||
background: conic-gradient(
|
||||
var(--score-color) calc(var(--score) * 3.6deg),
|
||||
#e5e7eb calc(var(--score) * 3.6deg)
|
||||
);
|
||||
}
|
||||
</style>
|
||||
|
||||
{% block extra_head %}{% endblock %}
|
||||
</head>
|
||||
<body class="bg-gray-50 min-h-screen">
|
||||
<!-- Navigation -->
|
||||
<nav class="bg-white shadow-sm border-b border-gray-200">
|
||||
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<div class="flex justify-between h-16">
|
||||
<div class="flex items-center">
|
||||
<a href="/" class="flex items-center space-x-2">
|
||||
<svg class="w-8 h-8 text-blue-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M9 12l2 2 4-4m5.618-4.016A11.955 11.955 0 0112 2.944a11.955 11.955 0 01-8.618 3.04A12.02 12.02 0 003 9c0 5.591 3.824 10.29 9 11.622 5.176-1.332 9-6.03 9-11.622 0-1.042-.133-2.052-.382-3.016z"/>
|
||||
</svg>
|
||||
<span class="font-bold text-xl text-gray-900">Website Analyzer</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="flex items-center space-x-4">
|
||||
<a href="/" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">
|
||||
New Scan
|
||||
</a>
|
||||
<a href="/api/" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">
|
||||
API
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<!-- Main Content -->
|
||||
<main class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
|
||||
{% block content %}{% endblock %}
|
||||
</main>
|
||||
|
||||
<!-- Footer -->
|
||||
<footer class="bg-white border-t border-gray-200 mt-auto">
|
||||
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-6">
|
||||
<div class="flex justify-between items-center">
|
||||
<p class="text-gray-500 text-sm">
|
||||
Website Analyzer - Security & Performance Scanner
|
||||
</p>
|
||||
<div class="flex space-x-4">
|
||||
<a href="/api/health/" class="text-gray-400 hover:text-gray-600 text-sm">
|
||||
Health Check
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
{% block extra_js %}{% endblock %}
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
"""
|
||||
Websites app initialization.
|
||||
"""
|
||||
|
||||
default_app_config = 'websites.apps.WebsitesConfig'
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
"""
|
||||
Django admin configuration for Website Analyzer models.
|
||||
"""
|
||||
|
||||
from django.contrib import admin
|
||||
from .models import Website, Scan, Issue, Metric
|
||||
|
||||
|
||||
@admin.register(Website)
|
||||
class WebsiteAdmin(admin.ModelAdmin):
|
||||
list_display = ('url', 'domain', 'created_at', 'last_scanned_at')
|
||||
list_filter = ('created_at', 'last_scanned_at')
|
||||
search_fields = ('url', 'domain')
|
||||
readonly_fields = ('id', 'created_at', 'domain')
|
||||
ordering = ('-created_at',)
|
||||
|
||||
|
||||
class IssueInline(admin.TabularInline):
|
||||
model = Issue
|
||||
extra = 0
|
||||
readonly_fields = ('id', 'category', 'severity', 'tool', 'title', 'created_at')
|
||||
can_delete = False
|
||||
show_change_link = True
|
||||
max_num = 10
|
||||
|
||||
|
||||
class MetricInline(admin.TabularInline):
|
||||
model = Metric
|
||||
extra = 0
|
||||
readonly_fields = ('id', 'name', 'display_name', 'value', 'unit', 'source', 'score')
|
||||
can_delete = False
|
||||
max_num = 15
|
||||
|
||||
|
||||
@admin.register(Scan)
|
||||
class ScanAdmin(admin.ModelAdmin):
|
||||
list_display = (
|
||||
'id', 'website', 'status', 'overall_score',
|
||||
'performance_score', 'security_score', 'created_at'
|
||||
)
|
||||
list_filter = ('status', 'created_at')
|
||||
search_fields = ('website__url', 'website__domain')
|
||||
readonly_fields = (
|
||||
'id', 'created_at', 'started_at', 'completed_at',
|
||||
'celery_task_id', 'raw_lighthouse_data', 'raw_zap_data',
|
||||
'raw_playwright_data', 'raw_headers_data'
|
||||
)
|
||||
inlines = [IssueInline, MetricInline]
|
||||
ordering = ('-created_at',)
|
||||
|
||||
fieldsets = (
|
||||
('Basic Info', {
|
||||
'fields': ('id', 'website', 'status', 'celery_task_id')
|
||||
}),
|
||||
('Timestamps', {
|
||||
'fields': ('created_at', 'started_at', 'completed_at')
|
||||
}),
|
||||
('Scores', {
|
||||
'fields': (
|
||||
'overall_score', 'performance_score', 'accessibility_score',
|
||||
'seo_score', 'best_practices_score', 'security_score'
|
||||
)
|
||||
}),
|
||||
('Errors', {
|
||||
'fields': ('error_message',),
|
||||
'classes': ('collapse',)
|
||||
}),
|
||||
('Raw Data', {
|
||||
'fields': (
|
||||
'raw_lighthouse_data', 'raw_zap_data',
|
||||
'raw_playwright_data', 'raw_headers_data'
|
||||
),
|
||||
'classes': ('collapse',)
|
||||
}),
|
||||
)
|
||||
|
||||
|
||||
@admin.register(Issue)
|
||||
class IssueAdmin(admin.ModelAdmin):
|
||||
list_display = ('title', 'scan', 'category', 'severity', 'tool', 'created_at')
|
||||
list_filter = ('category', 'severity', 'tool', 'created_at')
|
||||
search_fields = ('title', 'description', 'scan__website__url')
|
||||
readonly_fields = ('id', 'created_at', 'raw_data')
|
||||
ordering = ('severity', '-created_at')
|
||||
|
||||
|
||||
@admin.register(Metric)
|
||||
class MetricAdmin(admin.ModelAdmin):
|
||||
list_display = ('display_name', 'scan', 'value', 'unit', 'source', 'score')
|
||||
list_filter = ('source', 'unit')
|
||||
search_fields = ('name', 'display_name', 'scan__website__url')
|
||||
readonly_fields = ('id', 'created_at')
|
||||
ordering = ('name',)
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
"""
|
||||
Websites app configuration.
|
||||
"""
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class WebsitesConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'websites'
|
||||
verbose_name = 'Website Scanner'
|
||||
|
|
@ -0,0 +1,493 @@
|
|||
"""
|
||||
Database models for Website Analyzer.
|
||||
|
||||
This module defines the core data models for storing websites, scans,
|
||||
issues, and metrics from various scanning tools.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
from django.core.validators import URLValidator
|
||||
|
||||
|
||||
class Website(models.Model):
|
||||
"""
|
||||
Represents a website that has been scanned.
|
||||
|
||||
Each unique URL gets one Website record, which can have multiple
|
||||
Scan records associated with it.
|
||||
"""
|
||||
|
||||
id = models.UUIDField(
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
editable=False,
|
||||
help_text="Unique identifier for the website"
|
||||
)
|
||||
url = models.URLField(
|
||||
max_length=2048,
|
||||
unique=True,
|
||||
validators=[URLValidator(schemes=['http', 'https'])],
|
||||
help_text="The normalized URL of the website"
|
||||
)
|
||||
domain = models.CharField(
|
||||
max_length=255,
|
||||
db_index=True,
|
||||
help_text="The domain extracted from the URL"
|
||||
)
|
||||
created_at = models.DateTimeField(
|
||||
auto_now_add=True,
|
||||
help_text="When the website was first added"
|
||||
)
|
||||
last_scanned_at = models.DateTimeField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="When the website was last scanned"
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'websites'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['domain']),
|
||||
models.Index(fields=['-last_scanned_at']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return self.url
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
"""Extract domain from URL before saving."""
|
||||
if self.url:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(self.url)
|
||||
self.domain = parsed.netloc.lower()
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
|
||||
class ScanStatus(models.TextChoices):
|
||||
"""Enumeration of possible scan statuses."""
|
||||
PENDING = 'pending', 'Pending'
|
||||
RUNNING = 'running', 'Running'
|
||||
DONE = 'done', 'Completed'
|
||||
FAILED = 'failed', 'Failed'
|
||||
PARTIAL = 'partial', 'Partially Completed'
|
||||
|
||||
|
||||
class Scan(models.Model):
|
||||
"""
|
||||
Represents a single scan of a website.
|
||||
|
||||
Contains aggregated scores from various scanning tools and
|
||||
links to detailed issues and metrics.
|
||||
"""
|
||||
|
||||
id = models.UUIDField(
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
editable=False,
|
||||
help_text="Unique identifier for the scan"
|
||||
)
|
||||
website = models.ForeignKey(
|
||||
Website,
|
||||
on_delete=models.CASCADE,
|
||||
related_name='scans',
|
||||
help_text="The website that was scanned"
|
||||
)
|
||||
status = models.CharField(
|
||||
max_length=20,
|
||||
choices=ScanStatus.choices,
|
||||
default=ScanStatus.PENDING,
|
||||
db_index=True,
|
||||
help_text="Current status of the scan"
|
||||
)
|
||||
|
||||
# Celery task tracking
|
||||
celery_task_id = models.CharField(
|
||||
max_length=255,
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Celery task ID for tracking"
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
created_at = models.DateTimeField(
|
||||
auto_now_add=True,
|
||||
help_text="When the scan was created"
|
||||
)
|
||||
started_at = models.DateTimeField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="When the scan started running"
|
||||
)
|
||||
completed_at = models.DateTimeField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="When the scan completed"
|
||||
)
|
||||
|
||||
# Aggregated scores (0-100)
|
||||
performance_score = models.IntegerField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Lighthouse performance score (0-100)"
|
||||
)
|
||||
accessibility_score = models.IntegerField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Lighthouse accessibility score (0-100)"
|
||||
)
|
||||
seo_score = models.IntegerField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Lighthouse SEO score (0-100)"
|
||||
)
|
||||
best_practices_score = models.IntegerField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Lighthouse best practices score (0-100)"
|
||||
)
|
||||
security_score = models.IntegerField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Computed security score based on issues (0-100)"
|
||||
)
|
||||
|
||||
# Overall health score (computed average)
|
||||
overall_score = models.IntegerField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Overall health score (0-100)"
|
||||
)
|
||||
|
||||
# Error tracking
|
||||
error_message = models.TextField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Error message if scan failed"
|
||||
)
|
||||
|
||||
# Raw data from scanners
|
||||
raw_lighthouse_data = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Raw Lighthouse report data"
|
||||
)
|
||||
raw_zap_data = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Raw OWASP ZAP report data"
|
||||
)
|
||||
raw_playwright_data = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Raw Playwright analysis data"
|
||||
)
|
||||
raw_headers_data = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Raw HTTP headers analysis data"
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'scans'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['status']),
|
||||
models.Index(fields=['-created_at']),
|
||||
models.Index(fields=['website', '-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"Scan {self.id} - {self.website.url} ({self.status})"
|
||||
|
||||
def calculate_overall_score(self):
|
||||
"""
|
||||
Calculate overall health score as weighted average of all scores.
|
||||
|
||||
Weights:
|
||||
- Performance: 25%
|
||||
- Security: 30%
|
||||
- Accessibility: 15%
|
||||
- SEO: 15%
|
||||
- Best Practices: 15%
|
||||
"""
|
||||
scores = [
|
||||
(self.performance_score, 0.25),
|
||||
(self.security_score, 0.30),
|
||||
(self.accessibility_score, 0.15),
|
||||
(self.seo_score, 0.15),
|
||||
(self.best_practices_score, 0.15),
|
||||
]
|
||||
|
||||
total_weight = 0
|
||||
weighted_sum = 0
|
||||
|
||||
for score, weight in scores:
|
||||
if score is not None:
|
||||
weighted_sum += score * weight
|
||||
total_weight += weight
|
||||
|
||||
if total_weight > 0:
|
||||
self.overall_score = round(weighted_sum / total_weight)
|
||||
else:
|
||||
self.overall_score = None
|
||||
|
||||
return self.overall_score
|
||||
|
||||
def calculate_security_score(self):
|
||||
"""
|
||||
Calculate security score based on security issues found.
|
||||
|
||||
Starts at 100 and deducts points based on issue severity:
|
||||
- Critical: -25 points each
|
||||
- High: -15 points each
|
||||
- Medium: -8 points each
|
||||
- Low: -3 points each
|
||||
- Info: -1 point each
|
||||
"""
|
||||
deductions = {
|
||||
'critical': 25,
|
||||
'high': 15,
|
||||
'medium': 8,
|
||||
'low': 3,
|
||||
'info': 1,
|
||||
}
|
||||
|
||||
score = 100
|
||||
security_issues = self.issues.filter(
|
||||
category__in=['security', 'headers', 'tls', 'cors']
|
||||
)
|
||||
|
||||
for issue in security_issues:
|
||||
score -= deductions.get(issue.severity, 0)
|
||||
|
||||
self.security_score = max(0, score)
|
||||
return self.security_score
|
||||
|
||||
|
||||
class IssueCategory(models.TextChoices):
|
||||
"""Categories of issues that can be detected."""
|
||||
PERFORMANCE = 'performance', 'Performance'
|
||||
SECURITY = 'security', 'Security'
|
||||
HEADERS = 'headers', 'HTTP Headers'
|
||||
TLS = 'tls', 'TLS/SSL'
|
||||
CORS = 'cors', 'CORS'
|
||||
ACCESSIBILITY = 'accessibility', 'Accessibility'
|
||||
SEO = 'seo', 'SEO'
|
||||
BEST_PRACTICES = 'best_practices', 'Best Practices'
|
||||
CONTENT = 'content', 'Content'
|
||||
RESOURCES = 'resources', 'Resources'
|
||||
|
||||
|
||||
class IssueSeverity(models.TextChoices):
|
||||
"""Severity levels for issues."""
|
||||
CRITICAL = 'critical', 'Critical'
|
||||
HIGH = 'high', 'High'
|
||||
MEDIUM = 'medium', 'Medium'
|
||||
LOW = 'low', 'Low'
|
||||
INFO = 'info', 'Informational'
|
||||
|
||||
|
||||
class ScannerTool(models.TextChoices):
|
||||
"""Scanner tools that can detect issues."""
|
||||
LIGHTHOUSE = 'lighthouse', 'Google Lighthouse'
|
||||
ZAP = 'owasp_zap', 'OWASP ZAP'
|
||||
PLAYWRIGHT = 'playwright', 'Playwright'
|
||||
HEADER_CHECK = 'header_check', 'HTTP Header Check'
|
||||
TLS_CHECK = 'tls_check', 'TLS/SSL Check'
|
||||
|
||||
|
||||
class Issue(models.Model):
|
||||
"""
|
||||
Represents a specific issue found during a scan.
|
||||
|
||||
Issues are categorized by type, severity, and the tool that detected them.
|
||||
Each issue includes a description and suggested remediation.
|
||||
"""
|
||||
|
||||
id = models.UUIDField(
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
editable=False
|
||||
)
|
||||
scan = models.ForeignKey(
|
||||
Scan,
|
||||
on_delete=models.CASCADE,
|
||||
related_name='issues',
|
||||
help_text="The scan that found this issue"
|
||||
)
|
||||
|
||||
# Classification
|
||||
category = models.CharField(
|
||||
max_length=30,
|
||||
choices=IssueCategory.choices,
|
||||
db_index=True,
|
||||
help_text="Category of the issue"
|
||||
)
|
||||
severity = models.CharField(
|
||||
max_length=20,
|
||||
choices=IssueSeverity.choices,
|
||||
db_index=True,
|
||||
help_text="Severity level of the issue"
|
||||
)
|
||||
tool = models.CharField(
|
||||
max_length=30,
|
||||
choices=ScannerTool.choices,
|
||||
help_text="Tool that detected this issue"
|
||||
)
|
||||
|
||||
# Issue details
|
||||
title = models.CharField(
|
||||
max_length=500,
|
||||
help_text="Brief title of the issue"
|
||||
)
|
||||
description = models.TextField(
|
||||
help_text="Detailed description of the issue"
|
||||
)
|
||||
affected_url = models.URLField(
|
||||
max_length=2048,
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Specific URL affected by this issue"
|
||||
)
|
||||
remediation = models.TextField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Suggested fix or remediation"
|
||||
)
|
||||
|
||||
# Additional data from scanner
|
||||
raw_data = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Raw data from the scanner for this issue"
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
created_at = models.DateTimeField(
|
||||
auto_now_add=True
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'issues'
|
||||
ordering = ['severity', '-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['scan', 'category']),
|
||||
models.Index(fields=['scan', 'severity']),
|
||||
models.Index(fields=['tool']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"[{self.severity}] {self.title}"
|
||||
|
||||
|
||||
class MetricUnit(models.TextChoices):
|
||||
"""Units of measurement for metrics."""
|
||||
MILLISECONDS = 'ms', 'Milliseconds'
|
||||
SECONDS = 's', 'Seconds'
|
||||
BYTES = 'bytes', 'Bytes'
|
||||
KILOBYTES = 'kb', 'Kilobytes'
|
||||
MEGABYTES = 'mb', 'Megabytes'
|
||||
SCORE = 'score', 'Score (0-1)'
|
||||
PERCENT = 'percent', 'Percentage'
|
||||
COUNT = 'count', 'Count'
|
||||
|
||||
|
||||
class Metric(models.Model):
|
||||
"""
|
||||
Represents a specific metric measured during a scan.
|
||||
|
||||
Metrics are numerical values with units, such as page load time,
|
||||
total byte weight, number of requests, etc.
|
||||
"""
|
||||
|
||||
id = models.UUIDField(
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
editable=False
|
||||
)
|
||||
scan = models.ForeignKey(
|
||||
Scan,
|
||||
on_delete=models.CASCADE,
|
||||
related_name='metrics',
|
||||
help_text="The scan that measured this metric"
|
||||
)
|
||||
|
||||
# Metric identification
|
||||
name = models.CharField(
|
||||
max_length=100,
|
||||
db_index=True,
|
||||
help_text="Name of the metric (e.g., 'first_contentful_paint_ms')"
|
||||
)
|
||||
display_name = models.CharField(
|
||||
max_length=200,
|
||||
help_text="Human-readable name for display"
|
||||
)
|
||||
|
||||
# Value
|
||||
value = models.FloatField(
|
||||
help_text="Numeric value of the metric"
|
||||
)
|
||||
unit = models.CharField(
|
||||
max_length=20,
|
||||
choices=MetricUnit.choices,
|
||||
help_text="Unit of measurement"
|
||||
)
|
||||
|
||||
# Source
|
||||
source = models.CharField(
|
||||
max_length=30,
|
||||
choices=ScannerTool.choices,
|
||||
help_text="Tool that provided this metric"
|
||||
)
|
||||
|
||||
# Score (if applicable)
|
||||
score = models.FloatField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Lighthouse score for this metric (0-1)"
|
||||
)
|
||||
|
||||
# Timestamp
|
||||
created_at = models.DateTimeField(
|
||||
auto_now_add=True
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'metrics'
|
||||
ordering = ['name']
|
||||
indexes = [
|
||||
models.Index(fields=['scan', 'name']),
|
||||
models.Index(fields=['source']),
|
||||
]
|
||||
# Ensure unique metric names per scan
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=['scan', 'name'],
|
||||
name='unique_metric_per_scan'
|
||||
)
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.display_name}: {self.value} {self.unit}"
|
||||
|
||||
def get_formatted_value(self):
|
||||
"""Return a formatted string representation of the value."""
|
||||
if self.unit == MetricUnit.MILLISECONDS:
|
||||
if self.value >= 1000:
|
||||
return f"{self.value / 1000:.2f}s"
|
||||
return f"{self.value:.0f}ms"
|
||||
elif self.unit == MetricUnit.BYTES:
|
||||
if self.value >= 1024 * 1024:
|
||||
return f"{self.value / (1024 * 1024):.2f} MB"
|
||||
elif self.value >= 1024:
|
||||
return f"{self.value / 1024:.1f} KB"
|
||||
return f"{self.value:.0f} bytes"
|
||||
elif self.unit == MetricUnit.PERCENT:
|
||||
return f"{self.value:.1f}%"
|
||||
elif self.unit == MetricUnit.SCORE:
|
||||
return f"{self.value:.3f}"
|
||||
else:
|
||||
return f"{self.value:.2f} {self.get_unit_display()}"
|
||||
|
|
@ -0,0 +1,160 @@
|
|||
# Website Analyzer - Docker Compose Configuration
|
||||
# This file orchestrates all services required for the application
|
||||
|
||||
version: '3.9'
|
||||
|
||||
services:
|
||||
# ==========================================================================
|
||||
# PostgreSQL Database
|
||||
# ==========================================================================
|
||||
db:
|
||||
image: postgres:16-alpine
|
||||
container_name: analyzer_db
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_USER: analyzer
|
||||
POSTGRES_PASSWORD: analyzer_password
|
||||
POSTGRES_DB: website_analyzer
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U analyzer -d website_analyzer"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ==========================================================================
|
||||
# Redis - Message Broker & Cache
|
||||
# ==========================================================================
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: analyzer_redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ==========================================================================
|
||||
# Django Web Application
|
||||
# ==========================================================================
|
||||
web:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
container_name: analyzer_web
|
||||
restart: unless-stopped
|
||||
command: >
|
||||
sh -c "python manage.py migrate &&
|
||||
python manage.py collectstatic --noinput &&
|
||||
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --workers 4 --threads 2"
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- static_volume:/app/staticfiles
|
||||
ports:
|
||||
- "8000:8000"
|
||||
env_file:
|
||||
- ./backend/.env
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/api/health/"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
# ==========================================================================
|
||||
# Celery Worker - Background Task Processing
|
||||
# ==========================================================================
|
||||
celery_worker:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
container_name: analyzer_celery_worker
|
||||
restart: unless-stopped
|
||||
command: celery -A core worker -l INFO --concurrency=2
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
env_file:
|
||||
- ./backend/.env
|
||||
depends_on:
|
||||
- db
|
||||
- redis
|
||||
- web
|
||||
|
||||
# ==========================================================================
|
||||
# Celery Beat - Scheduled Tasks (Optional)
|
||||
# ==========================================================================
|
||||
celery_beat:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
container_name: analyzer_celery_beat
|
||||
restart: unless-stopped
|
||||
command: celery -A core beat -l INFO
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
env_file:
|
||||
- ./backend/.env
|
||||
depends_on:
|
||||
- db
|
||||
- redis
|
||||
- celery_worker
|
||||
|
||||
# ==========================================================================
|
||||
# OWASP ZAP - Security Scanner
|
||||
# ==========================================================================
|
||||
zap:
|
||||
image: ghcr.io/zaproxy/zaproxy:stable
|
||||
container_name: analyzer_zap
|
||||
restart: unless-stopped
|
||||
command: zap.sh -daemon -host 0.0.0.0 -port 8080 -config api.key=zap-api-key-change-me -config api.addrs.addr.name=.* -config api.addrs.addr.regex=true
|
||||
ports:
|
||||
- "8081:8080"
|
||||
volumes:
|
||||
- zap_data:/home/zap/.ZAP
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/JSON/core/view/version/?apikey=zap-api-key-change-me"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
|
||||
# ==========================================================================
|
||||
# Lighthouse Scanner Service (Node.js)
|
||||
# ==========================================================================
|
||||
lighthouse:
|
||||
build:
|
||||
context: ./lighthouse
|
||||
dockerfile: Dockerfile
|
||||
container_name: analyzer_lighthouse
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3001:3001"
|
||||
volumes:
|
||||
- lighthouse_reports:/app/reports
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3001/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
redis_data:
|
||||
static_volume:
|
||||
zap_data:
|
||||
lighthouse_reports:
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: analyzer_network
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# Lighthouse Scanner Service - Dockerfile
|
||||
# Node.js service that runs Lighthouse CLI and provides HTTP API
|
||||
|
||||
FROM node:20-slim
|
||||
|
||||
# Install Chrome dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
chromium \
|
||||
fonts-liberation \
|
||||
libappindicator3-1 \
|
||||
libasound2 \
|
||||
libatk-bridge2.0-0 \
|
||||
libatk1.0-0 \
|
||||
libcups2 \
|
||||
libdbus-1-3 \
|
||||
libdrm2 \
|
||||
libgbm1 \
|
||||
libgtk-3-0 \
|
||||
libnspr4 \
|
||||
libnss3 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxfixes3 \
|
||||
libxkbcommon0 \
|
||||
libxrandr2 \
|
||||
xdg-utils \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set Chrome path for Lighthouse
|
||||
ENV CHROME_PATH=/usr/bin/chromium
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package files
|
||||
COPY package*.json ./
|
||||
|
||||
# Install dependencies
|
||||
RUN npm ci --only=production
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create reports directory
|
||||
RUN mkdir -p reports
|
||||
|
||||
# Create non-root user
|
||||
RUN useradd -m -u 1000 lighthouse && \
|
||||
chown -R lighthouse:lighthouse /app
|
||||
USER lighthouse
|
||||
|
||||
EXPOSE 3001
|
||||
|
||||
CMD ["node", "server.js"]
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"name": "lighthouse-scanner",
|
||||
"version": "1.0.0",
|
||||
"description": "Lighthouse scanner service for Website Analyzer",
|
||||
"main": "server.js",
|
||||
"scripts": {
|
||||
"start": "node server.js",
|
||||
"dev": "node --watch server.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"express": "^4.18.2",
|
||||
"lighthouse": "^11.4.0",
|
||||
"chrome-launcher": "^1.1.0",
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,328 @@
|
|||
/**
|
||||
* Lighthouse Scanner Service
|
||||
*
|
||||
* This service provides an HTTP API for running Lighthouse audits.
|
||||
* It's designed to be called from the Django backend via Celery tasks.
|
||||
*/
|
||||
|
||||
const express = require('express');
|
||||
const lighthouse = require('lighthouse');
|
||||
const chromeLauncher = require('chrome-launcher');
|
||||
const { v4: uuidv4 } = require('uuid');
|
||||
const fs = require('fs').promises;
|
||||
const path = require('path');
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
|
||||
const PORT = process.env.PORT || 3001;
|
||||
const REPORTS_DIR = path.join(__dirname, 'reports');
|
||||
|
||||
// Ensure reports directory exists
|
||||
fs.mkdir(REPORTS_DIR, { recursive: true }).catch(console.error);
|
||||
|
||||
/**
|
||||
* Health check endpoint
|
||||
*/
|
||||
app.get('/health', (req, res) => {
|
||||
res.json({ status: 'healthy', service: 'lighthouse-scanner' });
|
||||
});
|
||||
|
||||
/**
|
||||
* Run Lighthouse audit for a given URL
|
||||
*
|
||||
* POST /scan
|
||||
* Body: { "url": "https://example.com" }
|
||||
*
|
||||
* Returns: Lighthouse audit results as JSON
|
||||
*/
|
||||
app.post('/scan', async (req, res) => {
|
||||
const { url } = req.body;
|
||||
|
||||
if (!url) {
|
||||
return res.status(400).json({ error: 'URL is required' });
|
||||
}
|
||||
|
||||
// Validate URL format
|
||||
try {
|
||||
new URL(url);
|
||||
} catch (e) {
|
||||
return res.status(400).json({ error: 'Invalid URL format' });
|
||||
}
|
||||
|
||||
const scanId = uuidv4();
|
||||
console.log(`[${scanId}] Starting Lighthouse scan for: ${url}`);
|
||||
|
||||
let chrome = null;
|
||||
|
||||
try {
|
||||
// Launch Chrome
|
||||
chrome = await chromeLauncher.launch({
|
||||
chromeFlags: [
|
||||
'--headless',
|
||||
'--disable-gpu',
|
||||
'--no-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-extensions',
|
||||
'--disable-background-networking',
|
||||
'--disable-sync',
|
||||
'--disable-translate',
|
||||
'--metrics-recording-only',
|
||||
'--mute-audio',
|
||||
'--no-first-run',
|
||||
'--safebrowsing-disable-auto-update'
|
||||
]
|
||||
});
|
||||
|
||||
console.log(`[${scanId}] Chrome launched on port ${chrome.port}`);
|
||||
|
||||
// Lighthouse configuration
|
||||
const options = {
|
||||
logLevel: 'error',
|
||||
output: 'json',
|
||||
port: chrome.port,
|
||||
onlyCategories: ['performance', 'accessibility', 'best-practices', 'seo'],
|
||||
// Throttling settings for more realistic results
|
||||
throttling: {
|
||||
cpuSlowdownMultiplier: 4,
|
||||
downloadThroughputKbps: 1638.4,
|
||||
uploadThroughputKbps: 675,
|
||||
rttMs: 150
|
||||
},
|
||||
screenEmulation: {
|
||||
mobile: false,
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
deviceScaleFactor: 1,
|
||||
disabled: false
|
||||
},
|
||||
formFactor: 'desktop'
|
||||
};
|
||||
|
||||
// Run Lighthouse
|
||||
const runnerResult = await lighthouse(url, options);
|
||||
|
||||
// Extract the report
|
||||
const report = runnerResult.lhr;
|
||||
|
||||
// Process and extract key metrics
|
||||
const result = {
|
||||
scanId,
|
||||
url: report.finalUrl || url,
|
||||
fetchTime: report.fetchTime,
|
||||
|
||||
// Category scores (0-100)
|
||||
scores: {
|
||||
performance: Math.round((report.categories.performance?.score || 0) * 100),
|
||||
accessibility: Math.round((report.categories.accessibility?.score || 0) * 100),
|
||||
bestPractices: Math.round((report.categories['best-practices']?.score || 0) * 100),
|
||||
seo: Math.round((report.categories.seo?.score || 0) * 100)
|
||||
},
|
||||
|
||||
// Core Web Vitals and key metrics
|
||||
metrics: {
|
||||
firstContentfulPaint: {
|
||||
value: report.audits['first-contentful-paint']?.numericValue || null,
|
||||
unit: 'ms',
|
||||
score: report.audits['first-contentful-paint']?.score || null
|
||||
},
|
||||
largestContentfulPaint: {
|
||||
value: report.audits['largest-contentful-paint']?.numericValue || null,
|
||||
unit: 'ms',
|
||||
score: report.audits['largest-contentful-paint']?.score || null
|
||||
},
|
||||
speedIndex: {
|
||||
value: report.audits['speed-index']?.numericValue || null,
|
||||
unit: 'ms',
|
||||
score: report.audits['speed-index']?.score || null
|
||||
},
|
||||
timeToInteractive: {
|
||||
value: report.audits['interactive']?.numericValue || null,
|
||||
unit: 'ms',
|
||||
score: report.audits['interactive']?.score || null
|
||||
},
|
||||
totalBlockingTime: {
|
||||
value: report.audits['total-blocking-time']?.numericValue || null,
|
||||
unit: 'ms',
|
||||
score: report.audits['total-blocking-time']?.score || null
|
||||
},
|
||||
cumulativeLayoutShift: {
|
||||
value: report.audits['cumulative-layout-shift']?.numericValue || null,
|
||||
unit: 'score',
|
||||
score: report.audits['cumulative-layout-shift']?.score || null
|
||||
}
|
||||
},
|
||||
|
||||
// JavaScript and resource audits
|
||||
resources: {
|
||||
totalByteWeight: report.audits['total-byte-weight']?.numericValue || null,
|
||||
bootupTime: report.audits['bootup-time']?.numericValue || null,
|
||||
mainThreadWork: report.audits['mainthread-work-breakdown']?.numericValue || null,
|
||||
|
||||
// Unused resources
|
||||
unusedJavascript: extractUnusedResources(report.audits['unused-javascript']),
|
||||
unusedCss: extractUnusedResources(report.audits['unused-css-rules']),
|
||||
|
||||
// Render blocking resources
|
||||
renderBlockingResources: extractRenderBlockingResources(report.audits['render-blocking-resources']),
|
||||
|
||||
// Large bundles
|
||||
scriptTreemap: extractLargeScripts(report.audits['script-treemap-data']),
|
||||
|
||||
// Third party usage
|
||||
thirdPartySummary: extractThirdPartySummary(report.audits['third-party-summary'])
|
||||
},
|
||||
|
||||
// Diagnostics
|
||||
diagnostics: {
|
||||
numRequests: report.audits['network-requests']?.details?.items?.length || 0,
|
||||
numScripts: countResourcesByType(report.audits['network-requests'], 'Script'),
|
||||
numStylesheets: countResourcesByType(report.audits['network-requests'], 'Stylesheet'),
|
||||
numImages: countResourcesByType(report.audits['network-requests'], 'Image'),
|
||||
numFonts: countResourcesByType(report.audits['network-requests'], 'Font'),
|
||||
totalTransferSize: report.audits['total-byte-weight']?.numericValue || 0
|
||||
},
|
||||
|
||||
// Failed audits (potential issues)
|
||||
issues: extractFailedAudits(report)
|
||||
};
|
||||
|
||||
// Save full report to file for debugging
|
||||
const reportPath = path.join(REPORTS_DIR, `${scanId}.json`);
|
||||
await fs.writeFile(reportPath, JSON.stringify(report, null, 2));
|
||||
|
||||
console.log(`[${scanId}] Scan completed successfully`);
|
||||
res.json(result);
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[${scanId}] Scan failed:`, error);
|
||||
res.status(500).json({
|
||||
error: 'Lighthouse scan failed',
|
||||
message: error.message,
|
||||
scanId
|
||||
});
|
||||
} finally {
|
||||
if (chrome) {
|
||||
await chrome.kill();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get a saved report by ID
|
||||
*/
|
||||
app.get('/report/:scanId', async (req, res) => {
|
||||
const { scanId } = req.params;
|
||||
const reportPath = path.join(REPORTS_DIR, `${scanId}.json`);
|
||||
|
||||
try {
|
||||
const report = await fs.readFile(reportPath, 'utf8');
|
||||
res.json(JSON.parse(report));
|
||||
} catch (error) {
|
||||
res.status(404).json({ error: 'Report not found' });
|
||||
}
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// Helper Functions
|
||||
// =============================================================================
|
||||
|
||||
function extractUnusedResources(audit) {
|
||||
if (!audit?.details?.items) return [];
|
||||
|
||||
return audit.details.items.slice(0, 10).map(item => ({
|
||||
url: item.url,
|
||||
totalBytes: item.totalBytes,
|
||||
wastedBytes: item.wastedBytes,
|
||||
wastedPercent: item.wastedPercent
|
||||
}));
|
||||
}
|
||||
|
||||
function extractRenderBlockingResources(audit) {
|
||||
if (!audit?.details?.items) return [];
|
||||
|
||||
return audit.details.items.map(item => ({
|
||||
url: item.url,
|
||||
wastedMs: item.wastedMs,
|
||||
totalBytes: item.totalBytes
|
||||
}));
|
||||
}
|
||||
|
||||
function extractLargeScripts(audit) {
|
||||
if (!audit?.details?.nodes) return [];
|
||||
|
||||
// Get scripts larger than 100KB
|
||||
const largeScripts = [];
|
||||
const processNode = (node, path = '') => {
|
||||
const currentPath = path ? `${path}/${node.name}` : node.name;
|
||||
|
||||
if (node.resourceBytes > 100 * 1024) {
|
||||
largeScripts.push({
|
||||
name: currentPath,
|
||||
resourceBytes: node.resourceBytes,
|
||||
unusedBytes: node.unusedBytes || 0
|
||||
});
|
||||
}
|
||||
|
||||
if (node.children) {
|
||||
node.children.forEach(child => processNode(child, currentPath));
|
||||
}
|
||||
};
|
||||
|
||||
audit.details.nodes.forEach(node => processNode(node));
|
||||
return largeScripts.slice(0, 20);
|
||||
}
|
||||
|
||||
function extractThirdPartySummary(audit) {
|
||||
if (!audit?.details?.items) return [];
|
||||
|
||||
return audit.details.items.slice(0, 10).map(item => ({
|
||||
entity: item.entity,
|
||||
transferSize: item.transferSize,
|
||||
blockingTime: item.blockingTime,
|
||||
mainThreadTime: item.mainThreadTime
|
||||
}));
|
||||
}
|
||||
|
||||
function countResourcesByType(audit, type) {
|
||||
if (!audit?.details?.items) return 0;
|
||||
return audit.details.items.filter(item => item.resourceType === type).length;
|
||||
}
|
||||
|
||||
function extractFailedAudits(report) {
|
||||
const issues = [];
|
||||
|
||||
const categoriesToCheck = ['performance', 'accessibility', 'best-practices', 'seo'];
|
||||
|
||||
categoriesToCheck.forEach(categoryId => {
|
||||
const category = report.categories[categoryId];
|
||||
if (!category?.auditRefs) return;
|
||||
|
||||
category.auditRefs.forEach(ref => {
|
||||
const audit = report.audits[ref.id];
|
||||
|
||||
// Include audits with score < 0.5 (50%)
|
||||
if (audit && audit.score !== null && audit.score < 0.5) {
|
||||
issues.push({
|
||||
id: audit.id,
|
||||
category: categoryId,
|
||||
title: audit.title,
|
||||
description: audit.description,
|
||||
score: audit.score,
|
||||
displayValue: audit.displayValue,
|
||||
impact: ref.weight || 0
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Sort by impact (weight) descending
|
||||
issues.sort((a, b) => b.impact - a.impact);
|
||||
|
||||
return issues.slice(0, 30);
|
||||
}
|
||||
|
||||
// Start the server
|
||||
app.listen(PORT, '0.0.0.0', () => {
|
||||
console.log(`Lighthouse Scanner Service running on port ${PORT}`);
|
||||
});
|
||||
Loading…
Reference in New Issue