Initial commit: Lighthouse scanner service

2025-12-08 10:06:56 +07:00 · 2025-12-08 10:06:56 +07:00 · 90ad47a721
commit 90ad47a721
38 changed files with 5375 additions and 0 deletions
--- a/backend/.env.example
+++ b/backend/.env.example
@ -0,0 +1,27 @@
 # Django Core Settings
 DEBUG=True
 SECRET_KEY=your-secret-key-change-in-production-abc123xyz789
 ALLOWED_HOSTS=localhost,127.0.0.1,web
 # Database
 DATABASE_URL=postgres://analyzer:analyzer_password@db:5432/website_analyzer
 # Redis & Celery
 REDIS_URL=redis://redis:6379/0
 CELERY_BROKER_URL=redis://redis:6379/0
 CELERY_RESULT_BACKEND=redis://redis:6379/1
 # OWASP ZAP Configuration
 ZAP_API_KEY=zap-api-key-change-me
 ZAP_HOST=http://zap:8080
 # Lighthouse Configuration
 LIGHTHOUSE_CHROME_FLAGS=--headless --no-sandbox --disable-gpu
 # Scan Settings
 MAX_SCAN_TIME_SECONDS=300
 SCAN_RATE_LIMIT_MINUTES=5
 MAX_CONCURRENT_SCANS=3
 # Security
 CORS_ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8000
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -0,0 +1,84 @@
 # Website Analyzer Backend - Dockerfile
 # Multi-stage build for efficient image size
 FROM python:3.11-slim as builder
 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1
 WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    libpq-dev \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Install Python dependencies
 COPY requirements.txt .
 RUN pip install --user -r requirements.txt
 # Install Playwright and its dependencies
 RUN pip install --user playwright && \
    python -m playwright install chromium && \
    python -m playwright install-deps chromium
 # ==========================================================================
 # Production Stage
 # ==========================================================================
 FROM python:3.11-slim
 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PATH="/root/.local/bin:$PATH"
 WORKDIR /app
 # Install runtime dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    libpq5 \
    curl \
    # Playwright/Chromium dependencies
    libnss3 \
    libnspr4 \
    libatk1.0-0 \
    libatk-bridge2.0-0 \
    libcups2 \
    libdrm2 \
    libdbus-1-3 \
    libxkbcommon0 \
    libxcomposite1 \
    libxdamage1 \
    libxfixes3 \
    libxrandr2 \
    libgbm1 \
    libasound2 \
    libpango-1.0-0 \
    libcairo2 \
    libatspi2.0-0 \
    && rm -rf /var/lib/apt/lists/*
 # Copy Python packages from builder
 COPY --from=builder /root/.local /root/.local
 COPY --from=builder /root/.cache/ms-playwright /root/.cache/ms-playwright
 # Copy application code
 COPY . .
 # Create logs directory
 RUN mkdir -p logs staticfiles
 # Create non-root user for security
 RUN useradd -m -u 1000 appuser && \
    chown -R appuser:appuser /app /root/.local /root/.cache
 USER appuser
 # Expose port
 EXPOSE 8000
 # Default command
 CMD ["gunicorn", "core.wsgi:application", "--bind", "0.0.0.0:8000", "--workers", "4"]
--- a/backend/api/init.py
+++ b/backend/api/init.py
@ -0,0 +1,5 @@
 """
 API app initialization.
 """
 default_app_config = 'api.apps.ApiConfig'
--- a/backend/api/apps.py
+++ b/backend/api/apps.py
@ -0,0 +1,11 @@
 """
 API app configuration.
 """
 from django.apps import AppConfig
 class ApiConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
    name = 'api'
    verbose_name = 'REST API'
--- a/backend/api/exceptions.py
+++ b/backend/api/exceptions.py
@ -0,0 +1,52 @@
 """
 Custom exception handler for DRF.
 """
 from rest_framework.views import exception_handler
 from rest_framework.response import Response
 from rest_framework import status
 import logging
 logger = logging.getLogger(__name__)
 def custom_exception_handler(exc, context):
    """
    Custom exception handler that provides consistent error responses.
    Handles common exceptions and formats them consistently.
    """
    # Call REST framework's default exception handler first
    response = exception_handler(exc, context)
    if response is not None:
        # Customize the response data
        custom_response_data = {
            'error': True,
            'status_code': response.status_code,
        }
        if isinstance(response.data, dict):
            if 'detail' in response.data:
                custom_response_data['message'] = str(response.data['detail'])
            else:
                custom_response_data['errors'] = response.data
        elif isinstance(response.data, list):
            custom_response_data['errors'] = response.data
        else:
            custom_response_data['message'] = str(response.data)
        response.data = custom_response_data
        return response
    # Handle unexpected exceptions
    logger.exception(f"Unhandled exception: {exc}")
    return Response(
        {
            'error': True,
            'status_code': 500,
            'message': 'An unexpected error occurred',
        },
        status=status.HTTP_500_INTERNAL_SERVER_ERROR
    )
--- a/backend/api/serializers.py
+++ b/backend/api/serializers.py
@ -0,0 +1,243 @@
 """
 DRF Serializers for the API.
 This module defines serializers for converting model instances
 to JSON and validating input data.
 """
 from rest_framework import serializers
 from websites.models import Website, Scan, Issue, Metric, ScanStatus
 class IssueSerializer(serializers.ModelSerializer):
    """Serializer for Issue model."""
    severity_display = serializers.CharField(source='get_severity_display', read_only=True)
    category_display = serializers.CharField(source='get_category_display', read_only=True)
    tool_display = serializers.CharField(source='get_tool_display', read_only=True)
    class Meta:
        model = Issue
        fields = [
            'id',
            'category',
            'category_display',
            'severity',
            'severity_display',
            'tool',
            'tool_display',
            'title',
            'description',
            'affected_url',
            'remediation',
            'created_at',
        ]
        read_only_fields = fields
 class MetricSerializer(serializers.ModelSerializer):
    """Serializer for Metric model."""
    formatted_value = serializers.CharField(source='get_formatted_value', read_only=True)
    unit_display = serializers.CharField(source='get_unit_display', read_only=True)
    class Meta:
        model = Metric
        fields = [
            'id',
            'name',
            'display_name',
            'value',
            'unit',
            'unit_display',
            'formatted_value',
            'source',
            'score',
        ]
        read_only_fields = fields
 class ScanListSerializer(serializers.ModelSerializer):
    """Serializer for Scan list views (minimal data)."""
    status_display = serializers.CharField(source='get_status_display', read_only=True)
    website_url = serializers.CharField(source='website.url', read_only=True)
    issues_count = serializers.SerializerMethodField()
    class Meta:
        model = Scan
        fields = [
            'id',
            'website_url',
            'status',
            'status_display',
            'created_at',
            'completed_at',
            'overall_score',
            'performance_score',
            'security_score',
            'issues_count',
        ]
        read_only_fields = fields
    def get_issues_count(self, obj):
        return obj.issues.count()
 class ScanDetailSerializer(serializers.ModelSerializer):
    """Serializer for Scan detail views (full data)."""
    status_display = serializers.CharField(source='get_status_display', read_only=True)
    website_url = serializers.CharField(source='website.url', read_only=True)
    website_domain = serializers.CharField(source='website.domain', read_only=True)
    issues = IssueSerializer(many=True, read_only=True)
    metrics = MetricSerializer(many=True, read_only=True)
    issues_by_category = serializers.SerializerMethodField()
    issues_by_severity = serializers.SerializerMethodField()
    class Meta:
        model = Scan
        fields = [
            'id',
            'website_url',
            'website_domain',
            'status',
            'status_display',
            'created_at',
            'started_at',
            'completed_at',
            'overall_score',
            'performance_score',
            'accessibility_score',
            'seo_score',
            'best_practices_score',
            'security_score',
            'error_message',
            'issues',
            'metrics',
            'issues_by_category',
            'issues_by_severity',
        ]
        read_only_fields = fields
    def get_issues_by_category(self, obj):
        """Group issues by category."""
        from collections import defaultdict
        grouped = defaultdict(list)
        for issue in obj.issues.all():
            grouped[issue.category].append(IssueSerializer(issue).data)
        return dict(grouped)
    def get_issues_by_severity(self, obj):
        """Count issues by severity."""
        from django.db.models import Count
        counts = obj.issues.values('severity').annotate(count=Count('id'))
        return {item['severity']: item['count'] for item in counts}
 class ScanCreateSerializer(serializers.Serializer):
    """Serializer for creating new scans."""
    url = serializers.URLField(
        required=True,
        help_text="The URL to scan (must be http or https)"
    )
    def validate_url(self, value):
        """Validate and normalize the URL."""
        from scanner.utils import validate_url
        is_valid, result = validate_url(value)
        if not is_valid:
            raise serializers.ValidationError(result)
        return result  # Return normalized URL
    def create(self, validated_data):
        """Create Website and Scan records."""
        from scanner.tasks import check_rate_limit, check_concurrent_scan_limit, run_scan_task
        url = validated_data['url']
        # Check rate limit
        rate_limit_error = check_rate_limit(url)
        if rate_limit_error:
            raise serializers.ValidationError({'url': rate_limit_error})
        # Check concurrent scan limit
        concurrent_error = check_concurrent_scan_limit()
        if concurrent_error:
            raise serializers.ValidationError({'non_field_errors': concurrent_error})
        # Get or create Website
        website, created = Website.objects.get_or_create(
            url=url,
            defaults={'domain': validated_data.get('domain', '')}
        )
        # Create Scan
        scan = Scan.objects.create(
            website=website,
            status=ScanStatus.PENDING
        )
        # Trigger Celery task
        task = run_scan_task.delay(str(scan.id))
        # Update scan with task ID
        scan.celery_task_id = task.id
        scan.save(update_fields=['celery_task_id'])
        return scan
 class WebsiteSerializer(serializers.ModelSerializer):
    """Serializer for Website model."""
    scans_count = serializers.SerializerMethodField()
    latest_scan = serializers.SerializerMethodField()
    class Meta:
        model = Website
        fields = [
            'id',
            'url',
            'domain',
            'created_at',
            'last_scanned_at',
            'scans_count',
            'latest_scan',
        ]
        read_only_fields = fields
    def get_scans_count(self, obj):
        return obj.scans.count()
    def get_latest_scan(self, obj):
        latest = obj.scans.first()
        if latest:
            return ScanListSerializer(latest).data
        return None
 class WebsiteDetailSerializer(WebsiteSerializer):
    """Detailed Website serializer with scan list."""
    scans = ScanListSerializer(many=True, read_only=True)
    class Meta(WebsiteSerializer.Meta):
        fields = WebsiteSerializer.Meta.fields + ['scans']
 class HealthCheckSerializer(serializers.Serializer):
    """Serializer for health check response."""
    status = serializers.CharField()
    database = serializers.CharField()
    redis = serializers.CharField()
    celery = serializers.CharField()
    timestamp = serializers.DateTimeField()
--- a/backend/api/urls.py
+++ b/backend/api/urls.py
@ -0,0 +1,18 @@
 """
 URL routing for the API.
 """
 from django.urls import path, include
 from rest_framework.routers import DefaultRouter
 from . import views
 router = DefaultRouter()
 router.register(r'scans', views.ScanViewSet, basename='scan')
 router.register(r'websites', views.WebsiteViewSet, basename='website')
 router.register(r'issues', views.IssueViewSet, basename='issue')
 urlpatterns = [
    path('', views.api_root, name='api-root'),
    path('health/', views.health_check, name='health-check'),
    path('', include(router.urls)),
 ]
--- a/backend/api/views.py
+++ b/backend/api/views.py
@ -0,0 +1,336 @@
 """
 DRF Views for the API.
 This module defines API views for scans, websites, and issues.
 """
 import logging
 from django.db import connection
 from django.utils import timezone
 from django.core.cache import cache
 from rest_framework import viewsets, status, generics
 from rest_framework.decorators import api_view, action
 from rest_framework.response import Response
 from rest_framework.pagination import PageNumberPagination
 from rest_framework.throttling import AnonRateThrottle
 from websites.models import Website, Scan, Issue, Metric
 from .serializers import (
    WebsiteSerializer,
    WebsiteDetailSerializer,
    ScanListSerializer,
    ScanDetailSerializer,
    ScanCreateSerializer,
    IssueSerializer,
    MetricSerializer,
    HealthCheckSerializer,
 )
 logger = logging.getLogger(__name__)
 class ScanRateThrottle(AnonRateThrottle):
    """Custom throttle for scan creation."""
    rate = '10/hour'
 class StandardResultsPagination(PageNumberPagination):
    """Standard pagination for list views."""
    page_size = 20
    page_size_query_param = 'page_size'
    max_page_size = 100
 class ScanViewSet(viewsets.ModelViewSet):
    """
    ViewSet for Scan operations.
    Endpoints:
    - POST /api/scans/ - Create a new scan
    - GET /api/scans/ - List all scans
    - GET /api/scans/{id}/ - Get scan details
    - DELETE /api/scans/{id}/ - Delete a scan
    """
    queryset = Scan.objects.select_related('website').prefetch_related('issues', 'metrics')
    pagination_class = StandardResultsPagination
    def get_serializer_class(self):
        if self.action == 'list':
            return ScanListSerializer
        elif self.action == 'create':
            return ScanCreateSerializer
        return ScanDetailSerializer
    def get_throttles(self):
        if self.action == 'create':
            return [ScanRateThrottle()]
        return super().get_throttles()
    def create(self, request, *args, **kwargs):
        """
        Create a new scan.
        Request body:
        ```json
        {"url": "https://example.com"}
        ```
        Returns the created scan with pending status.
        The scan will be processed asynchronously.
        """
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        try:
            scan = serializer.save()
            # Return the created scan details
            response_serializer = ScanDetailSerializer(scan)
            return Response(
                response_serializer.data,
                status=status.HTTP_201_CREATED
            )
        except Exception as e:
            logger.exception("Error creating scan")
            return Response(
                {'error': str(e)},
                status=status.HTTP_500_INTERNAL_SERVER_ERROR
            )
    @action(detail=True, methods=['get'])
    def issues(self, request, pk=None):
        """Get all issues for a scan."""
        scan = self.get_object()
        issues = scan.issues.all()
        # Optional filtering
        category = request.query_params.get('category')
        severity = request.query_params.get('severity')
        tool = request.query_params.get('tool')
        if category:
            issues = issues.filter(category=category)
        if severity:
            issues = issues.filter(severity=severity)
        if tool:
            issues = issues.filter(tool=tool)
        serializer = IssueSerializer(issues, many=True)
        return Response(serializer.data)
    @action(detail=True, methods=['get'])
    def metrics(self, request, pk=None):
        """Get all metrics for a scan."""
        scan = self.get_object()
        metrics = scan.metrics.all()
        # Optional filtering by source
        source = request.query_params.get('source')
        if source:
            metrics = metrics.filter(source=source)
        serializer = MetricSerializer(metrics, many=True)
        return Response(serializer.data)
    @action(detail=True, methods=['get'])
    def status(self, request, pk=None):
        """Get just the status of a scan (for polling)."""
        scan = self.get_object()
        return Response({
            'id': str(scan.id),
            'status': scan.status,
            'status_display': scan.get_status_display(),
            'progress': self._get_scan_progress(scan),
        })
    def _get_scan_progress(self, scan):
        """Estimate scan progress based on status and results."""
        if scan.status == 'done':
            return 100
        elif scan.status == 'failed':
            return 0
        elif scan.status == 'running':
            # Estimate based on what data we have
            progress = 10  # Started
            if scan.raw_headers_data:
                progress += 20
            if scan.raw_playwright_data:
                progress += 25
            if scan.raw_lighthouse_data:
                progress += 30
            if scan.raw_zap_data:
                progress += 15
            return min(progress, 95)
        return 0
 class WebsiteViewSet(viewsets.ReadOnlyModelViewSet):
    """
    ViewSet for Website operations.
    Endpoints:
    - GET /api/websites/ - List all websites
    - GET /api/websites/{id}/ - Get website details
    - GET /api/websites/{id}/scans/ - Get scans for a website
    """
    queryset = Website.objects.prefetch_related('scans')
    pagination_class = StandardResultsPagination
    def get_serializer_class(self):
        if self.action == 'retrieve':
            return WebsiteDetailSerializer
        return WebsiteSerializer
    @action(detail=True, methods=['get'])
    def scans(self, request, pk=None):
        """Get all scans for a website."""
        website = self.get_object()
        scans = website.scans.all()
        # Apply pagination
        page = self.paginate_queryset(scans)
        if page is not None:
            serializer = ScanListSerializer(page, many=True)
            return self.get_paginated_response(serializer.data)
        serializer = ScanListSerializer(scans, many=True)
        return Response(serializer.data)
 class IssueViewSet(viewsets.ReadOnlyModelViewSet):
    """
    ViewSet for Issue operations.
    Endpoints:
    - GET /api/issues/ - List all issues (with filtering)
    - GET /api/issues/{id}/ - Get issue details
    """
    queryset = Issue.objects.select_related('scan', 'scan__website')
    serializer_class = IssueSerializer
    pagination_class = StandardResultsPagination
    def get_queryset(self):
        queryset = super().get_queryset()
        # Filter by scan
        scan_id = self.request.query_params.get('scan')
        if scan_id:
            queryset = queryset.filter(scan_id=scan_id)
        # Filter by category
        category = self.request.query_params.get('category')
        if category:
            queryset = queryset.filter(category=category)
        # Filter by severity
        severity = self.request.query_params.get('severity')
        if severity:
            queryset = queryset.filter(severity=severity)
        # Filter by tool
        tool = self.request.query_params.get('tool')
        if tool:
            queryset = queryset.filter(tool=tool)
        return queryset
@api_view(['GET'])
 def health_check(request):
    """
    Health check endpoint.
    Checks:
    - Database connectivity
    - Redis connectivity
    - Celery worker status
    Returns health status of all components.
    """
    health = {
        'status': 'healthy',
        'database': 'unknown',
        'redis': 'unknown',
        'celery': 'unknown',
        'timestamp': timezone.now(),
    }
    # Check database
    try:
        connection.ensure_connection()
        health['database'] = 'healthy'
    except Exception as e:
        health['database'] = f'unhealthy: {e}'
        health['status'] = 'unhealthy'
    # Check Redis
    try:
        cache.set('health_check', 'ok', 10)
        if cache.get('health_check') == 'ok':
            health['redis'] = 'healthy'
        else:
            health['redis'] = 'unhealthy: cache not working'
            health['status'] = 'degraded'
    except Exception as e:
        health['redis'] = f'unhealthy: {e}'
        health['status'] = 'degraded'
    # Check Celery (basic check)
    try:
        from core.celery import app as celery_app
        inspect = celery_app.control.inspect()
        # Try to get active workers
        active = inspect.active()
        if active:
            health['celery'] = f'healthy ({len(active)} workers)'
        else:
            health['celery'] = 'degraded: no active workers'
            health['status'] = 'degraded'
    except Exception as e:
        health['celery'] = f'unknown: {e}'
    status_code = 200 if health['status'] == 'healthy' else 503
    serializer = HealthCheckSerializer(health)
    return Response(serializer.data, status=status_code)
@api_view(['GET'])
 def api_root(request):
    """
    API root endpoint.
    Returns available endpoints and basic API information.
    """
    return Response({
        'message': 'Website Analyzer API',
        'version': '1.0.0',
        'endpoints': {
            'scans': '/api/scans/',
            'websites': '/api/websites/',
            'issues': '/api/issues/',
            'health': '/api/health/',
        },
        'documentation': {
            'create_scan': {
                'method': 'POST',
                'url': '/api/scans/',
                'body': {'url': 'https://example.com'},
                'description': 'Create a new website scan'
            },
            'get_scan': {
                'method': 'GET',
                'url': '/api/scans/{id}/',
                'description': 'Get scan results and details'
            },
            'list_scans': {
                'method': 'GET',
                'url': '/api/scans/',
                'description': 'List all scans with pagination'
            },
        }
    })
--- a/backend/core/init.py
+++ b/backend/core/init.py
@ -0,0 +1,9 @@
 """
 Core module initialization.
 This module loads the Celery app so that shared_task will use this app.
 """
 from .celery import app as celery_app
 __all__ = ('celery_app',)
--- a/backend/core/asgi.py
+++ b/backend/core/asgi.py
@ -0,0 +1,11 @@
 """
 ASGI config for Website Analyzer project.
 """
 import os
 from django.core.asgi import get_asgi_application
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
 application = get_asgi_application()
--- a/backend/core/celery.py
+++ b/backend/core/celery.py
@ -0,0 +1,28 @@
 """
 Celery configuration for Website Analyzer.
 This module configures Celery for asynchronous task processing,
 specifically for running website scans in the background.
 """
 import os
 from celery import Celery
 # Set the default Django settings module for the 'celery' program.
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
 app = Celery('website_analyzer')
 # Using a string here means the worker doesn't have to serialize
 # the configuration object to child processes.
 app.config_from_object('django.conf:settings', namespace='CELERY')
 # Load task modules from all registered Django apps.
 app.autodiscover_tasks()
@app.task(bind=True, ignore_result=True)
 def debug_task(self):
    """Debug task for testing Celery connectivity."""
    print(f'Request: {self.request!r}')
--- a/backend/core/settings.py
+++ b/backend/core/settings.py
@ -0,0 +1,300 @@
 """
 Django settings for Website Analyzer project.
 This module contains all configuration settings for the Django application,
 including database, caching, security, and third-party integrations.
 """
 import os
 from pathlib import Path
 from dotenv import load_dotenv
 # Load environment variables from .env file
 load_dotenv()
 # Build paths inside the project like this: BASE_DIR / 'subdir'.
 BASE_DIR = Path(__file__).resolve().parent.parent
 # SECURITY WARNING: keep the secret key used in production secret!
 SECRET_KEY = os.getenv('SECRET_KEY', 'django-insecure-change-me-in-production')
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = os.getenv('DEBUG', 'False').lower() in ('true', '1', 'yes')
 ALLOWED_HOSTS = os.getenv('ALLOWED_HOSTS', 'localhost,127.0.0.1').split(',')
 # Application definition
 INSTALLED_APPS = [
    'django.contrib.admin',
    'django.contrib.auth',
    'django.contrib.contenttypes',
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
    # Third-party apps
    'rest_framework',
    'corsheaders',
    # Local apps
    'websites',
    'scanner',
    'api',
 ]
 MIDDLEWARE = [
    'django.middleware.security.SecurityMiddleware',
    'whitenoise.middleware.WhiteNoiseMiddleware',
    'corsheaders.middleware.CorsMiddleware',
    'django.contrib.sessions.middleware.SessionMiddleware',
    'django.middleware.common.CommonMiddleware',
    'django.middleware.csrf.CsrfViewMiddleware',
    'django.contrib.auth.middleware.AuthenticationMiddleware',
    'django.contrib.messages.middleware.MessageMiddleware',
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
 ]
 ROOT_URLCONF = 'core.urls'
 TEMPLATES = [
    {
        'BACKEND': 'django.template.backends.django.DjangoTemplates',
        'DIRS': [BASE_DIR / 'templates'],
        'APP_DIRS': True,
        'OPTIONS': {
            'context_processors': [
                'django.template.context_processors.debug',
                'django.template.context_processors.request',
                'django.contrib.auth.context_processors.auth',
                'django.contrib.messages.context_processors.messages',
            ],
        },
    },
 ]
 WSGI_APPLICATION = 'core.wsgi.application'
 # Database
 # Parse DATABASE_URL or use default PostgreSQL settings
 DATABASE_URL = os.getenv('DATABASE_URL', 'postgres://analyzer:analyzer_password@localhost:5432/website_analyzer')
 # Parse the DATABASE_URL
 import re
 db_pattern = r'postgres://(?P<user>[^:]+):(?P<password>[^@]+)@(?P<host>[^:]+):(?P<port>\d+)/(?P<name>.+)'
 db_match = re.match(db_pattern, DATABASE_URL)
 if db_match:
    DATABASES = {
        'default': {
            'ENGINE': 'django.db.backends.postgresql',
            'NAME': db_match.group('name'),
            'USER': db_match.group('user'),
            'PASSWORD': db_match.group('password'),
            'HOST': db_match.group('host'),
            'PORT': db_match.group('port'),
        }
    }
 else:
    # Fallback for development
    DATABASES = {
        'default': {
            'ENGINE': 'django.db.backends.sqlite3',
            'NAME': BASE_DIR / 'db.sqlite3',
        }
    }
 # Password validation
 AUTH_PASSWORD_VALIDATORS = [
    {
        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
    },
 ]
 # Internationalization
 LANGUAGE_CODE = 'en-us'
 TIME_ZONE = 'UTC'
 USE_I18N = True
 USE_TZ = True
 # Static files (CSS, JavaScript, Images)
 STATIC_URL = 'static/'
 STATIC_ROOT = BASE_DIR / 'staticfiles'
 STATICFILES_DIRS = [BASE_DIR / 'static']
 STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
 # Default primary key field type
 DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
 # =============================================================================
 # REST Framework Configuration
 # =============================================================================
 REST_FRAMEWORK = {
    'DEFAULT_RENDERER_CLASSES': [
        'rest_framework.renderers.JSONRenderer',
        'rest_framework.renderers.BrowsableAPIRenderer',
    ],
    'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.PageNumberPagination',
    'PAGE_SIZE': 20,
    'DEFAULT_THROTTLE_CLASSES': [
        'rest_framework.throttling.AnonRateThrottle',
        'rest_framework.throttling.UserRateThrottle'
    ],
    'DEFAULT_THROTTLE_RATES': {
        'anon': '100/hour',
        'user': '1000/hour',
        'scan': '10/hour',  # Specific rate for scan creation
    },
    'EXCEPTION_HANDLER': 'api.exceptions.custom_exception_handler',
 }
 # =============================================================================
 # CORS Configuration
 # =============================================================================
 CORS_ALLOWED_ORIGINS = os.getenv(
    'CORS_ALLOWED_ORIGINS', 
    'http://localhost:3000,http://localhost:8000'
 ).split(',')
 CORS_ALLOW_CREDENTIALS = True
 # =============================================================================
 # Celery Configuration
 # =============================================================================
 CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')
 CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/1')
 CELERY_ACCEPT_CONTENT = ['json']
 CELERY_TASK_SERIALIZER = 'json'
 CELERY_RESULT_SERIALIZER = 'json'
 CELERY_TIMEZONE = TIME_ZONE
 CELERY_TASK_TRACK_STARTED = True
 CELERY_TASK_TIME_LIMIT = int(os.getenv('MAX_SCAN_TIME_SECONDS', '300'))
 CELERY_TASK_SOFT_TIME_LIMIT = CELERY_TASK_TIME_LIMIT - 30
 # =============================================================================
 # Redis Cache Configuration
 # =============================================================================
 REDIS_URL = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
 CACHES = {
    'default': {
        'BACKEND': 'django.core.cache.backends.redis.RedisCache',
        'LOCATION': REDIS_URL,
    }
 }
 # =============================================================================
 # Scanner Configuration
 # =============================================================================
 SCANNER_CONFIG = {
    # OWASP ZAP settings
    'ZAP_API_KEY': os.getenv('ZAP_API_KEY', ''),
    'ZAP_HOST': os.getenv('ZAP_HOST', 'http://localhost:8080'),
    'ZAP_TIMEOUT': 120,
    # Lighthouse settings
    'LIGHTHOUSE_CHROME_FLAGS': os.getenv(
        'LIGHTHOUSE_CHROME_FLAGS', 
        '--headless --no-sandbox --disable-gpu'
    ),
    'LIGHTHOUSE_TIMEOUT': 60,
    # Playwright settings
    'PLAYWRIGHT_TIMEOUT': 30000,  # milliseconds
    'PLAYWRIGHT_VIEWPORT': {'width': 1920, 'height': 1080},
    # General scan settings
    'MAX_SCAN_TIME_SECONDS': int(os.getenv('MAX_SCAN_TIME_SECONDS', '300')),
    'SCAN_RATE_LIMIT_MINUTES': int(os.getenv('SCAN_RATE_LIMIT_MINUTES', '5')),
    'MAX_CONCURRENT_SCANS': int(os.getenv('MAX_CONCURRENT_SCANS', '3')),
    # Safety settings - blocked IP ranges (RFC1918 private ranges + localhost)
    'BLOCKED_IP_RANGES': [
        '10.0.0.0/8',
        '172.16.0.0/12',
        '192.168.0.0/16',
        '127.0.0.0/8',
        '169.254.0.0/16',  # Link-local
        '::1/128',  # IPv6 localhost
        'fc00::/7',  # IPv6 private
        'fe80::/10',  # IPv6 link-local
    ],
    'BLOCKED_HOSTS': ['localhost', 'localhost.localdomain'],
    # Large file thresholds
    'LARGE_IMAGE_THRESHOLD_BYTES': 1024 * 1024,  # 1 MB
    'LARGE_JS_BUNDLE_THRESHOLD_BYTES': 500 * 1024,  # 500 KB
 }
 # =============================================================================
 # Logging Configuration
 # =============================================================================
 LOGGING = {
    'version': 1,
    'disable_existing_loggers': False,
    'formatters': {
        'verbose': {
            'format': '{levelname} {asctime} {module} {process:d} {thread:d} {message}',
            'style': '{',
        },
        'simple': {
            'format': '{levelname} {asctime} {module} {message}',
            'style': '{',
        },
    },
    'handlers': {
        'console': {
            'class': 'logging.StreamHandler',
            'formatter': 'simple',
        },
        'file': {
            'class': 'logging.FileHandler',
            'filename': BASE_DIR / 'logs' / 'django.log',
            'formatter': 'verbose',
        },
    },
    'root': {
        'handlers': ['console'],
        'level': 'INFO',
    },
    'loggers': {
        'django': {
            'handlers': ['console'],
            'level': os.getenv('DJANGO_LOG_LEVEL', 'INFO'),
            'propagate': False,
        },
        'scanner': {
            'handlers': ['console'],
            'level': 'DEBUG' if DEBUG else 'INFO',
            'propagate': False,
        },
        'celery': {
            'handlers': ['console'],
            'level': 'INFO',
            'propagate': False,
        },
    },
 }
 # Create logs directory if it doesn't exist
 (BASE_DIR / 'logs').mkdir(exist_ok=True)
--- a/backend/core/urls.py
+++ b/backend/core/urls.py
@ -0,0 +1,20 @@
 """
 URL configuration for Website Analyzer project.
 """
 from django.contrib import admin
 from django.urls import path, include
 from django.views.generic import TemplateView
 urlpatterns = [
    # Admin
    path('admin/', admin.site.urls),
    # API endpoints
    path('api/', include('api.urls')),
    # Frontend views
    path('', TemplateView.as_view(template_name='index.html'), name='home'),
    path('scan/<uuid:scan_id>/', TemplateView.as_view(template_name='scan_detail.html'), name='scan_detail'),
 ]
--- a/backend/core/wsgi.py
+++ b/backend/core/wsgi.py
@ -0,0 +1,11 @@
 """
 WSGI config for Website Analyzer project.
 """
 import os
 from django.core.wsgi import get_wsgi_application
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
 application = get_wsgi_application()
--- a/backend/manage.py
+++ b/backend/manage.py
@ -0,0 +1,22 @@
 #!/usr/bin/env python
 """Django's command-line utility for administrative tasks."""
 import os
 import sys
 def main():
    """Run administrative tasks."""
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
    try:
        from django.core.management import execute_from_command_line
    except ImportError as exc:
        raise ImportError(
            "Couldn't import Django. Are you sure it's installed and "
            "available on your PYTHONPATH environment variable? Did you "
            "forget to activate a virtual environment?"
        ) from exc
    execute_from_command_line(sys.argv)
 if __name__ == '__main__':
    main()
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@ -0,0 +1,91 @@
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "website-analyzer"
 version = "1.0.0"
 description = "A Django-based web application for analyzing website performance, security, and best practices"
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.11"
 authors = [
    {name = "Website Analyzer Team"}
 ]
 classifiers = [
    "Development Status :: 4 - Beta",
    "Framework :: Django :: 5.0",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
 ]
 dependencies = [
    "Django>=5.0,<6.0",
    "djangorestframework>=3.14.0",
    "django-cors-headers>=4.3.0",
    "psycopg2-binary>=2.9.9",
    "celery[redis]>=5.3.0",
    "redis>=5.0.0",
    "httpx>=0.26.0",
    "playwright>=1.40.0",
    "python-dotenv>=1.0.0",
    "gunicorn>=21.2.0",
    "whitenoise>=6.6.0",
    "validators>=0.22.0",
    "ipaddress>=1.0.23",
 ]
 [project.optional-dependencies]
 dev = [
    "pytest>=7.4.0",
    "pytest-django>=4.7.0",
    "pytest-asyncio>=0.23.0",
    "pytest-cov>=4.1.0",
    "black>=23.12.0",
    "isort>=5.13.0",
    "flake8>=7.0.0",
    "mypy>=1.8.0",
    "django-stubs>=4.2.0",
 ]
 [tool.black]
 line-length = 100
 target-version = ['py311']
 include = '\.pyi?$'
 exclude = '''
 /(
    \.git
    | \.hg
    | \.mypy_cache
    | \.tox
    | \.venv
    | _build
    | buck-out
    | build
    | dist
    | migrations
 )/
 '''
 [tool.isort]
 profile = "black"
 line_length = 100
 skip = ["migrations", ".venv"]
 [tool.pytest.ini_options]
 DJANGO_SETTINGS_MODULE = "core.settings"
 python_files = ["test_*.py", "*_test.py"]
 addopts = "-v --tb=short"
 [tool.mypy]
 python_version = "3.11"
 plugins = ["mypy_django_plugin.main"]
 ignore_missing_imports = true
 strict = false
 [tool.django-stubs]
 django_settings_module = "core.settings"
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -0,0 +1,36 @@
 # Django & REST Framework
 Django>=5.0,<6.0
 djangorestframework>=3.14.0
 django-cors-headers>=4.3.0
 # Database
 psycopg2-binary>=2.9.9
 # Async Task Queue
 celery[redis]>=5.3.0
 redis>=5.0.0
 # HTTP Client
 httpx>=0.26.0
 # Browser Automation
 playwright>=1.40.0
 # Environment & Config
 python-dotenv>=1.0.0
 # Production Server
 gunicorn>=21.2.0
 whitenoise>=6.6.0
 # Validation & Utilities
 validators>=0.22.0
 # Development & Testing
 pytest>=7.4.0
 pytest-django>=4.7.0
 pytest-asyncio>=0.23.0
 pytest-cov>=4.1.0
 black>=23.12.0
 isort>=5.13.0
 flake8>=7.0.0
--- a/backend/scanner/init.py
+++ b/backend/scanner/init.py
@ -0,0 +1,5 @@
 """
 Scanner app initialization.
 """
 default_app_config = 'scanner.apps.ScannerConfig'
--- a/backend/scanner/apps.py
+++ b/backend/scanner/apps.py
@ -0,0 +1,11 @@
 """
 Scanner app configuration.
 """
 from django.apps import AppConfig
 class ScannerConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
    name = 'scanner'
    verbose_name = 'Scanner Tools'
--- a/backend/scanner/scanners/init.py
+++ b/backend/scanner/scanners/init.py
@ -0,0 +1,25 @@
 """
 Scanner modules initialization.
 This package contains the various scanner implementations
 that analyze websites for performance, security, and best practices.
 """
 from .base import BaseScanner, ScannerResult
 from .lighthouse import LighthouseScanner
 from .playwright_scanner import PlaywrightScanner
 from .zap import ZAPScanner
 from .headers import HeaderScanner
 from .tls import TLSScanner
 from .runner import ScanRunner
 __all__ = [
    'BaseScanner',
    'ScannerResult',
    'LighthouseScanner',
    'PlaywrightScanner',
    'ZAPScanner',
    'HeaderScanner',
    'TLSScanner',
    'ScanRunner',
 ]
--- a/backend/scanner/scanners/base.py
+++ b/backend/scanner/scanners/base.py
@ -0,0 +1,161 @@
 """
 Base scanner interface and result structures.
 All scanner implementations should inherit from BaseScanner
 and return ScannerResult objects.
 """
 import logging
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional
 from enum import Enum
 logger = logging.getLogger(__name__)
 class ScannerStatus(str, Enum):
    """Status of a scanner execution."""
    SUCCESS = "success"
    PARTIAL = "partial"
    FAILED = "failed"
    SKIPPED = "skipped"
@dataclass
 class IssueData:
    """
    Represents a single issue found by a scanner.
    Attributes:
        category: Issue category (security, performance, etc.)
        severity: Issue severity (critical, high, medium, low, info)
        title: Brief title of the issue
        description: Detailed description
        tool: The scanner that found this issue
        affected_url: Specific URL affected (optional)
        remediation: Suggested fix (optional)
        raw_data: Original scanner data (optional)
    """
    category: str
    severity: str
    title: str
    description: str
    tool: str
    affected_url: Optional[str] = None
    remediation: Optional[str] = None
    raw_data: Optional[Dict[str, Any]] = None
@dataclass
 class MetricData:
    """
    Represents a single metric measured by a scanner.
    Attributes:
        name: Internal name (e.g., 'first_contentful_paint_ms')
        display_name: Human-readable name
        value: Numeric value
        unit: Unit of measurement
        source: The scanner that measured this
        score: Normalized score (0-1) if available
    """
    name: str
    display_name: str
    value: float
    unit: str
    source: str
    score: Optional[float] = None
@dataclass
 class ScannerResult:
    """
    Result of a scanner execution.
    Attributes:
        scanner_name: Name of the scanner
        status: Execution status
        issues: List of issues found
        metrics: List of metrics measured
        scores: Dictionary of category scores
        raw_data: Original scanner output
        error_message: Error details if failed
    """
    scanner_name: str
    status: ScannerStatus
    issues: List[IssueData] = field(default_factory=list)
    metrics: List[MetricData] = field(default_factory=list)
    scores: Dict[str, int] = field(default_factory=dict)
    raw_data: Optional[Dict[str, Any]] = None
    error_message: Optional[str] = None
 class BaseScanner(ABC):
    """
    Abstract base class for all scanners.
    Each scanner implementation must implement the `run` method
    which performs the actual scan and returns a ScannerResult.
    """
    name: str = "base"
    def __init__(self, config: Optional[Dict[str, Any]] = None):
        """
        Initialize the scanner with optional configuration.
        Args:
            config: Scanner-specific configuration dictionary
        """
        self.config = config or {}
        self.logger = logging.getLogger(f"scanner.{self.name}")
    @abstractmethod
    def run(self, url: str) -> ScannerResult:
        """
        Run the scanner against the given URL.
        Args:
            url: The URL to scan
        Returns:
            ScannerResult with findings, metrics, and status
        """
        pass
    def is_available(self) -> bool:
        """
        Check if the scanner service/tool is available.
        Returns:
            True if the scanner can be used, False otherwise
        """
        return True
    def _create_error_result(self, error: Exception) -> ScannerResult:
        """
        Create a failed result from an exception.
        Args:
            error: The exception that occurred
        Returns:
            ScannerResult with failed status
        """
        self.logger.error(f"Scanner {self.name} failed: {error}")
        return ScannerResult(
            scanner_name=self.name,
            status=ScannerStatus.FAILED,
            error_message=str(error),
            issues=[
                IssueData(
                    category="scanner",
                    severity="info",
                    title=f"{self.name.title()} scan failed",
                    description=f"The {self.name} scanner encountered an error: {error}",
                    tool=self.name,
                    remediation="Check scanner service configuration and availability."
                )
            ]
        )
--- a/backend/scanner/scanners/headers.py
+++ b/backend/scanner/scanners/headers.py
@ -0,0 +1,405 @@
 """
 HTTP Header Security Scanner.
 This module analyzes HTTP response headers for security
 best practices and common misconfigurations.
 """
 import logging
 from typing import Any, Dict, List, Optional, Tuple
 import httpx
 from .base import (
    BaseScanner,
    ScannerResult,
    ScannerStatus,
    IssueData,
    MetricData,
 )
 logger = logging.getLogger(__name__)
 # Security header definitions with expected values and severity
 SECURITY_HEADERS = {
    'Strict-Transport-Security': {
        'severity': 'high',
        'description': 'HTTP Strict Transport Security (HSTS) forces browsers to use HTTPS.',
        'remediation': (
            'Add the header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload'
        ),
        'check_value': lambda v: 'max-age' in v.lower() and int(
            v.lower().split('max-age=')[1].split(';')[0].strip()
        ) >= 31536000 if 'max-age=' in v.lower() else False,
    },
    'Content-Security-Policy': {
        'severity': 'high',
        'description': 'Content Security Policy (CSP) helps prevent XSS and data injection attacks.',
        'remediation': (
            "Implement a Content-Security-Policy header that restricts sources for scripts, "
            "styles, and other resources. Start with a report-only policy to identify issues."
        ),
        'check_value': lambda v: "default-src" in v.lower() or "script-src" in v.lower(),
    },
    'X-Content-Type-Options': {
        'severity': 'medium',
        'description': 'Prevents browsers from MIME-sniffing responses.',
        'remediation': 'Add the header: X-Content-Type-Options: nosniff',
        'check_value': lambda v: v.lower() == 'nosniff',
    },
    'X-Frame-Options': {
        'severity': 'medium',
        'description': 'Protects against clickjacking by controlling page framing.',
        'remediation': 'Add the header: X-Frame-Options: DENY or SAMEORIGIN',
        'check_value': lambda v: v.upper() in ['DENY', 'SAMEORIGIN'],
    },
    'Referrer-Policy': {
        'severity': 'low',
        'description': 'Controls how much referrer information is sent with requests.',
        'remediation': (
            'Add the header: Referrer-Policy: strict-origin-when-cross-origin '
            'or no-referrer-when-downgrade'
        ),
        'check_value': lambda v: v.lower() in [
            'no-referrer', 'no-referrer-when-downgrade',
            'strict-origin', 'strict-origin-when-cross-origin',
            'same-origin', 'origin', 'origin-when-cross-origin'
        ],
    },
    'Permissions-Policy': {
        'severity': 'low',
        'description': 'Controls which browser features can be used.',
        'remediation': (
            'Add a Permissions-Policy header to restrict access to sensitive browser APIs '
            'like geolocation, camera, and microphone.'
        ),
        'check_value': lambda v: len(v) > 0,
    },
    'X-XSS-Protection': {
        'severity': 'info',
        'description': 'Legacy XSS filter (deprecated in modern browsers, CSP is preferred).',
        'remediation': 'While deprecated, you can add: X-XSS-Protection: 1; mode=block',
        'check_value': lambda v: '1' in v,
    },
 }
 # CORS security checks
 CORS_CHECKS = {
    'permissive_origin': {
        'severity': 'high',
        'title': 'Overly permissive CORS (Access-Control-Allow-Origin: *)',
        'description': (
            'The server allows requests from any origin. This can expose sensitive data '
            'to malicious websites if combined with credentials.'
        ),
        'remediation': (
            'Restrict Access-Control-Allow-Origin to specific trusted domains instead of using *. '
            'Never use * with Access-Control-Allow-Credentials: true.'
        ),
    },
    'credentials_with_wildcard': {
        'severity': 'critical',
        'title': 'CORS allows credentials with wildcard origin',
        'description': (
            'The server has Access-Control-Allow-Credentials: true with Access-Control-Allow-Origin: *. '
            'This is a severe misconfiguration that can allow credential theft.'
        ),
        'remediation': (
            'Never combine Access-Control-Allow-Credentials: true with a wildcard origin. '
            'Implement a whitelist of allowed origins.'
        ),
    },
 }
 class HeaderScanner(BaseScanner):
    """
    Scanner for HTTP security headers.
    Checks for:
    - Missing security headers
    - Improperly configured headers
    - CORS misconfigurations
    - Cookie security flags
    """
    name = "header_check"
    def __init__(self, config: Optional[Dict[str, Any]] = None):
        super().__init__(config)
        self.timeout = self.config.get('timeout', 30)
    def run(self, url: str) -> ScannerResult:
        """
        Run header security analysis on the URL.
        Args:
            url: The URL to analyze
        Returns:
            ScannerResult with header findings
        """
        self.logger.info(f"Starting header scan for {url}")
        try:
            # Make both GET and HEAD requests
            headers_data = self._fetch_headers(url)
            issues = []
            metrics = []
            # Check security headers
            header_issues, header_score = self._check_security_headers(
                headers_data['headers']
            )
            issues.extend(header_issues)
            # Check CORS configuration
            cors_issues = self._check_cors(headers_data['headers'], url)
            issues.extend(cors_issues)
            # Check cookies
            cookie_issues = self._check_cookies(headers_data['headers'], url)
            issues.extend(cookie_issues)
            # Create metrics
            metrics.append(MetricData(
                name='security_headers_score',
                display_name='Security Headers Score',
                value=float(header_score),
                unit='percent',
                source='header_check'
            ))
            metrics.append(MetricData(
                name='headers_missing_count',
                display_name='Missing Security Headers',
                value=float(len([i for i in header_issues if 'missing' in i.title.lower()])),
                unit='count',
                source='header_check'
            ))
            self.logger.info(
                f"Header scan complete: {len(issues)} issues, score: {header_score}"
            )
            return ScannerResult(
                scanner_name=self.name,
                status=ScannerStatus.SUCCESS,
                issues=issues,
                metrics=metrics,
                raw_data=headers_data
            )
        except httpx.TimeoutException:
            return self._create_error_result(Exception("Header check timed out"))
        except Exception as e:
            return self._create_error_result(e)
    def _fetch_headers(self, url: str) -> Dict[str, Any]:
        """Fetch headers from the URL."""
        with httpx.Client(
            timeout=self.timeout,
            follow_redirects=True,
            verify=True
        ) as client:
            # GET request
            get_response = client.get(url)
            # HEAD request
            head_response = client.head(url)
            return {
                'url': str(get_response.url),
                'status_code': get_response.status_code,
                'headers': dict(get_response.headers),
                'head_headers': dict(head_response.headers),
                'redirected': str(get_response.url) != url,
                'redirect_history': [str(r.url) for r in get_response.history],
            }
    def _check_security_headers(
        self, 
        headers: Dict[str, str]
    ) -> Tuple[List[IssueData], int]:
        """
        Check for security headers.
        Returns:
            Tuple of (list of issues, security score 0-100)
        """
        issues = []
        score = 100
        headers_lower = {k.lower(): v for k, v in headers.items()}
        for header_name, config in SECURITY_HEADERS.items():
            header_key = header_name.lower()
            if header_key not in headers_lower:
                # Missing header
                severity = config['severity']
                deduction = {'critical': 20, 'high': 15, 'medium': 10, 'low': 5, 'info': 2}
                score -= deduction.get(severity, 5)
                issues.append(IssueData(
                    category='headers',
                    severity=severity,
                    title=f'Missing security header: {header_name}',
                    description=config['description'],
                    tool='header_check',
                    remediation=config['remediation'],
                    raw_data={'header': header_name, 'status': 'missing'}
                ))
            else:
                # Header present, check value
                value = headers_lower[header_key]
                check_func = config.get('check_value')
                if check_func and not check_func(value):
                    issues.append(IssueData(
                        category='headers',
                        severity='low',
                        title=f'Weak configuration: {header_name}',
                        description=(
                            f"{config['description']} "
                            f"Current value may not provide optimal protection: {value}"
                        ),
                        tool='header_check',
                        remediation=config['remediation'],
                        raw_data={'header': header_name, 'value': value, 'status': 'weak'}
                    ))
                    score -= 3
        return issues, max(0, score)
    def _check_cors(self, headers: Dict[str, str], url: str) -> List[IssueData]:
        """Check CORS configuration for issues."""
        issues = []
        headers_lower = {k.lower(): v for k, v in headers.items()}
        acao = headers_lower.get('access-control-allow-origin', '')
        acac = headers_lower.get('access-control-allow-credentials', '')
        if acao == '*':
            if acac.lower() == 'true':
                # Critical: credentials with wildcard
                check = CORS_CHECKS['credentials_with_wildcard']
                issues.append(IssueData(
                    category='cors',
                    severity=check['severity'],
                    title=check['title'],
                    description=check['description'],
                    tool='header_check',
                    affected_url=url,
                    remediation=check['remediation'],
                    raw_data={
                        'Access-Control-Allow-Origin': acao,
                        'Access-Control-Allow-Credentials': acac
                    }
                ))
            else:
                # Warning: permissive origin
                check = CORS_CHECKS['permissive_origin']
                issues.append(IssueData(
                    category='cors',
                    severity='medium',  # Lower severity without credentials
                    title=check['title'],
                    description=check['description'],
                    tool='header_check',
                    affected_url=url,
                    remediation=check['remediation'],
                    raw_data={'Access-Control-Allow-Origin': acao}
                ))
        return issues
    def _check_cookies(self, headers: Dict[str, str], url: str) -> List[IssueData]:
        """Check Set-Cookie headers for security flags."""
        issues = []
        headers_lower = {k.lower(): v for k, v in headers.items()}
        # Get all Set-Cookie headers
        set_cookies = []
        for key, value in headers.items():
            if key.lower() == 'set-cookie':
                set_cookies.append(value)
        is_https = url.startswith('https://')
        for cookie in set_cookies:
            cookie_lower = cookie.lower()
            cookie_name = cookie.split('=')[0] if '=' in cookie else 'unknown'
            cookie_issues = []
            # Check Secure flag on HTTPS
            if is_https and 'secure' not in cookie_lower:
                cookie_issues.append({
                    'flag': 'Secure',
                    'description': (
                        'Cookie is set without Secure flag on HTTPS site. '
                        'This allows the cookie to be sent over unencrypted connections.'
                    ),
                    'severity': 'high'
                })
            # Check HttpOnly flag (important for session cookies)
            if 'httponly' not in cookie_lower:
                # Check if it might be a session cookie
                if any(term in cookie_name.lower() for term in ['session', 'auth', 'token', 'user']):
                    cookie_issues.append({
                        'flag': 'HttpOnly',
                        'description': (
                            'Session-like cookie is set without HttpOnly flag. '
                            'This allows JavaScript access, increasing XSS risk.'
                        ),
                        'severity': 'high'
                    })
                else:
                    cookie_issues.append({
                        'flag': 'HttpOnly',
                        'description': (
                            'Cookie is set without HttpOnly flag. '
                            'Consider adding it unless JavaScript needs access.'
                        ),
                        'severity': 'low'
                    })
            # Check SameSite attribute
            if 'samesite' not in cookie_lower:
                cookie_issues.append({
                    'flag': 'SameSite',
                    'description': (
                        'Cookie is set without SameSite attribute. '
                        'This can enable CSRF attacks in some scenarios.'
                    ),
                    'severity': 'medium'
                })
            elif 'samesite=none' in cookie_lower and 'secure' not in cookie_lower:
                cookie_issues.append({
                    'flag': 'SameSite=None without Secure',
                    'description': (
                        'Cookie has SameSite=None but no Secure flag. '
                        'Modern browsers will reject this cookie.'
                    ),
                    'severity': 'medium'
                })
            # Create issues for this cookie
            for ci in cookie_issues:
                issues.append(IssueData(
                    category='security',
                    severity=ci['severity'],
                    title=f"Cookie '{cookie_name}' missing {ci['flag']} flag",
                    description=ci['description'],
                    tool='header_check',
                    affected_url=url,
                    remediation=(
                        f"Add the {ci['flag']} flag to the Set-Cookie header. "
                        f"Example: Set-Cookie: {cookie_name}=value; Secure; HttpOnly; SameSite=Strict"
                    ),
                    raw_data={'cookie': cookie[:200]}  # Truncate for storage
                ))
        return issues
--- a/backend/scanner/scanners/lighthouse.py
+++ b/backend/scanner/scanners/lighthouse.py
@ -0,0 +1,323 @@
 """
 Lighthouse Scanner Integration.
 This module integrates with Google Lighthouse to measure
 performance, accessibility, SEO, and best practices.
 """
 import logging
 from typing import Any, Dict, Optional
 import httpx
 from django.conf import settings
 from .base import (
    BaseScanner,
    ScannerResult,
    ScannerStatus,
    IssueData,
    MetricData,
 )
 logger = logging.getLogger(__name__)
 class LighthouseScanner(BaseScanner):
    """
    Scanner that uses Google Lighthouse for performance analysis.
    Communicates with the Lighthouse service container via HTTP API.
    Collects performance metrics, Core Web Vitals, and various audits.
    """
    name = "lighthouse"
    def __init__(self, config: Optional[Dict[str, Any]] = None):
        super().__init__(config)
        self.service_url = self.config.get(
            'service_url',
            'http://lighthouse:3001'
        )
        self.timeout = self.config.get('timeout', 120)
    def is_available(self) -> bool:
        """Check if Lighthouse service is available."""
        try:
            with httpx.Client(timeout=5) as client:
                response = client.get(f"{self.service_url}/health")
                return response.status_code == 200
        except Exception as e:
            self.logger.warning(f"Lighthouse service not available: {e}")
            return False
    def run(self, url: str) -> ScannerResult:
        """
        Run Lighthouse scan against the URL.
        Args:
            url: The URL to analyze
        Returns:
            ScannerResult with performance metrics and issues
        """
        self.logger.info(f"Starting Lighthouse scan for {url}")
        try:
            with httpx.Client(timeout=self.timeout) as client:
                response = client.post(
                    f"{self.service_url}/scan",
                    json={"url": url}
                )
                response.raise_for_status()
                data = response.json()
            return self._parse_results(url, data)
        except httpx.TimeoutException:
            return self._create_error_result(
                Exception("Lighthouse scan timed out")
            )
        except httpx.HTTPStatusError as e:
            return self._create_error_result(
                Exception(f"Lighthouse service error: {e.response.status_code}")
            )
        except Exception as e:
            return self._create_error_result(e)
    def _parse_results(self, url: str, data: Dict[str, Any]) -> ScannerResult:
        """
        Parse Lighthouse results into ScannerResult format.
        Args:
            url: The scanned URL
            data: Raw Lighthouse response data
        Returns:
            Parsed ScannerResult
        """
        issues = []
        metrics = []
        # Extract scores
        scores = data.get('scores', {})
        # Extract and create metrics
        raw_metrics = data.get('metrics', {})
        # Core Web Vitals
        metric_mappings = [
            ('firstContentfulPaint', 'First Contentful Paint', 'ms'),
            ('largestContentfulPaint', 'Largest Contentful Paint', 'ms'),
            ('speedIndex', 'Speed Index', 'ms'),
            ('timeToInteractive', 'Time to Interactive', 'ms'),
            ('totalBlockingTime', 'Total Blocking Time', 'ms'),
            ('cumulativeLayoutShift', 'Cumulative Layout Shift', 'score'),
        ]
        for key, display_name, unit in metric_mappings:
            metric_data = raw_metrics.get(key, {})
            if metric_data and metric_data.get('value') is not None:
                metrics.append(MetricData(
                    name=self._to_snake_case(key),
                    display_name=display_name,
                    value=metric_data['value'],
                    unit=unit,
                    source='lighthouse',
                    score=metric_data.get('score')
                ))
        # Resource metrics
        resources = data.get('resources', {})
        diagnostics = data.get('diagnostics', {})
        if resources.get('totalByteWeight'):
            metrics.append(MetricData(
                name='total_byte_weight',
                display_name='Total Page Weight',
                value=resources['totalByteWeight'],
                unit='bytes',
                source='lighthouse'
            ))
        if resources.get('bootupTime'):
            metrics.append(MetricData(
                name='javascript_bootup_time',
                display_name='JavaScript Boot-up Time',
                value=resources['bootupTime'],
                unit='ms',
                source='lighthouse'
            ))
        if diagnostics.get('numRequests'):
            metrics.append(MetricData(
                name='total_requests',
                display_name='Total Network Requests',
                value=float(diagnostics['numRequests']),
                unit='count',
                source='lighthouse'
            ))
        # Extract issues from failed audits
        raw_issues = data.get('issues', [])
        for issue in raw_issues:
            severity = self._score_to_severity(issue.get('score', 0.5))
            category = self._map_category(issue.get('category', 'performance'))
            issues.append(IssueData(
                category=category,
                severity=severity,
                title=issue.get('title', 'Unknown issue'),
                description=issue.get('description', ''),
                tool='lighthouse',
                affected_url=url,
                remediation=self._get_remediation(issue.get('id')),
                raw_data=issue
            ))
        # Check for large bundles
        large_scripts = resources.get('scriptTreemap', [])
        for script in large_scripts[:5]:  # Top 5 largest
            if script.get('resourceBytes', 0) > settings.SCANNER_CONFIG.get(
                'LARGE_JS_BUNDLE_THRESHOLD_BYTES', 500 * 1024
            ):
                issues.append(IssueData(
                    category='resources',
                    severity='medium',
                    title=f"Large JavaScript bundle detected",
                    description=(
                        f"The script '{script.get('name', 'Unknown')}' "
                        f"is {script['resourceBytes'] / 1024:.1f} KB. "
                        "Large bundles can slow down page load and increase memory usage."
                    ),
                    tool='lighthouse',
                    affected_url=url,
                    remediation=(
                        "Consider code splitting, tree shaking, or lazy loading "
                        "to reduce bundle size."
                    ),
                    raw_data=script
                ))
        # Check for unused JavaScript
        unused_js = resources.get('unusedJavascript', [])
        if unused_js:
            total_wasted = sum(u.get('wastedBytes', 0) for u in unused_js)
            if total_wasted > 100 * 1024:  # More than 100KB unused
                issues.append(IssueData(
                    category='performance',
                    severity='medium',
                    title="Significant unused JavaScript detected",
                    description=(
                        f"Found {total_wasted / 1024:.1f} KB of unused JavaScript "
                        f"across {len(unused_js)} resources. This increases page "
                        "load time and memory usage."
                    ),
                    tool='lighthouse',
                    affected_url=url,
                    remediation=(
                        "Remove unused code or use code splitting to load "
                        "JavaScript only when needed."
                    ),
                    raw_data={'unused_resources': unused_js}
                ))
        # Check for render-blocking resources
        blocking = resources.get('renderBlockingResources', [])
        if blocking:
            total_wasted_ms = sum(r.get('wastedMs', 0) for r in blocking)
            if total_wasted_ms > 500:
                issues.append(IssueData(
                    category='performance',
                    severity='medium',
                    title="Render-blocking resources detected",
                    description=(
                        f"Found {len(blocking)} render-blocking resources "
                        f"adding approximately {total_wasted_ms:.0f}ms to page load. "
                        "These resources delay first paint."
                    ),
                    tool='lighthouse',
                    affected_url=url,
                    remediation=(
                        "Consider inlining critical CSS, deferring non-critical JS, "
                        "or using async/defer attributes."
                    ),
                    raw_data={'blocking_resources': blocking}
                ))
        self.logger.info(
            f"Lighthouse scan complete: {len(issues)} issues, {len(metrics)} metrics"
        )
        return ScannerResult(
            scanner_name=self.name,
            status=ScannerStatus.SUCCESS,
            issues=issues,
            metrics=metrics,
            scores={
                'performance': scores.get('performance', 0),
                'accessibility': scores.get('accessibility', 0),
                'best_practices': scores.get('bestPractices', 0),
                'seo': scores.get('seo', 0),
            },
            raw_data=data
        )
    def _to_snake_case(self, name: str) -> str:
        """Convert camelCase to snake_case."""
        import re
        s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
        return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
    def _score_to_severity(self, score: float) -> str:
        """Convert Lighthouse score to severity level."""
        if score is None:
            return 'info'
        elif score < 0.25:
            return 'high'
        elif score < 0.5:
            return 'medium'
        elif score < 0.75:
            return 'low'
        else:
            return 'info'
    def _map_category(self, lighthouse_category: str) -> str:
        """Map Lighthouse category to our category."""
        mapping = {
            'performance': 'performance',
            'accessibility': 'accessibility',
            'best-practices': 'best_practices',
            'seo': 'seo',
        }
        return mapping.get(lighthouse_category, 'performance')
    def _get_remediation(self, audit_id: str) -> str:
        """Get remediation text for known audit IDs."""
        remediations = {
            'first-contentful-paint': (
                "Reduce server response time, eliminate render-blocking resources, "
                "and optimize critical rendering path."
            ),
            'largest-contentful-paint': (
                "Optimize images, preload critical resources, and reduce server "
                "response time."
            ),
            'total-blocking-time': (
                "Reduce JavaScript execution time by breaking up long tasks, "
                "removing unused code, and minimizing main thread work."
            ),
            'cumulative-layout-shift': (
                "Always include size attributes on images and videos, reserve space "
                "for ad slots, and avoid inserting content above existing content."
            ),
            'speed-index': (
                "Minimize main thread work, reduce JavaScript execution time, "
                "and ensure text remains visible during webfont load."
            ),
            'interactive': (
                "Reduce JavaScript payload, defer non-critical scripts, and "
                "minimize main thread work."
            ),
        }
        return remediations.get(audit_id, "Review and optimize based on the audit details.")
--- a/backend/scanner/scanners/playwright_scanner.py
+++ b/backend/scanner/scanners/playwright_scanner.py
@ -0,0 +1,397 @@
 """
 Playwright Scanner Integration.
 This module uses Playwright to perform browser-based analysis,
 capturing console errors, network requests, and resource metrics.
 """
 import asyncio
 import logging
 import time
 from typing import Any, Dict, List, Optional
 from django.conf import settings
 from .base import (
    BaseScanner,
    ScannerResult,
    ScannerStatus,
    IssueData,
    MetricData,
 )
 logger = logging.getLogger(__name__)
 class PlaywrightScanner(BaseScanner):
    """
    Scanner using Playwright for browser-based analysis.
    Captures:
    - Console errors and warnings
    - Network request details
    - Page load timing
    - Large resources (images, scripts)
    - Memory usage indicators
    """
    name = "playwright"
    def __init__(self, config: Optional[Dict[str, Any]] = None):
        super().__init__(config)
        self.timeout = self.config.get('timeout', 30000)  # 30 seconds
        self.viewport = self.config.get('viewport', {'width': 1920, 'height': 1080})
    def run(self, url: str) -> ScannerResult:
        """
        Run Playwright analysis on the URL.
        Args:
            url: The URL to analyze
        Returns:
            ScannerResult with browser analysis data
        """
        self.logger.info(f"Starting Playwright scan for {url}")
        try:
            # Run async scan in sync context
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            try:
                result = loop.run_until_complete(self._async_scan(url))
            finally:
                loop.close()
            return result
        except Exception as e:
            return self._create_error_result(e)
    async def _async_scan(self, url: str) -> ScannerResult:
        """
        Async implementation of the scan.
        Args:
            url: The URL to analyze
        Returns:
            ScannerResult with findings
        """
        from playwright.async_api import async_playwright
        issues = []
        metrics = []
        raw_data = {
            'console_messages': [],
            'network_requests': [],
            'failed_requests': [],
            'large_resources': [],
        }
        async with async_playwright() as p:
            browser = await p.chromium.launch(
                headless=True,
                args=[
                    '--no-sandbox',
                    '--disable-setuid-sandbox',
                    '--disable-dev-shm-usage',
                    '--disable-gpu',
                ]
            )
            context = await browser.new_context(
                viewport=self.viewport,
                user_agent=(
                    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                    '(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
                )
            )
            page = await context.new_page()
            # Collect data
            console_messages = []
            network_requests = []
            failed_requests = []
            # Set up event listeners
            page.on("console", lambda msg: console_messages.append({
                'type': msg.type,
                'text': msg.text,
                'location': str(msg.location) if msg.location else None,
            }))
            page.on("request", lambda req: network_requests.append({
                'url': req.url,
                'method': req.method,
                'resource_type': req.resource_type,
                'timestamp': time.time(),
            }))
            page.on("requestfailed", lambda req: failed_requests.append({
                'url': req.url,
                'failure': req.failure,
                'resource_type': req.resource_type,
            }))
            # Navigate and measure
            start_time = time.time()
            try:
                response = await page.goto(
                    url,
                    wait_until='networkidle',
                    timeout=self.timeout
                )
                load_time = (time.time() - start_time) * 1000  # Convert to ms
                # Get response status
                status_code = response.status if response else 0
                # Wait a bit more for any delayed scripts
                await page.wait_for_timeout(2000)
                # Get performance timing
                perf_timing = await page.evaluate('''() => {
                    const timing = performance.timing;
                    const navigation = performance.getEntriesByType("navigation")[0];
                    return {
                        domContentLoaded: timing.domContentLoadedEventEnd - timing.navigationStart,
                        domComplete: timing.domComplete - timing.navigationStart,
                        loadEvent: timing.loadEventEnd - timing.navigationStart,
                        firstPaint: navigation ? navigation.domComplete : null,
                        transferSize: navigation ? navigation.transferSize : null,
                    };
                }''')
                # Get memory info (if available)
                memory_info = await page.evaluate('''() => {
                    if (performance.memory) {
                        return {
                            usedJSHeapSize: performance.memory.usedJSHeapSize,
                            totalJSHeapSize: performance.memory.totalJSHeapSize,
                            jsHeapSizeLimit: performance.memory.jsHeapSizeLimit,
                        };
                    }
                    return null;
                }''')
                # Get resource sizes
                resources = await page.evaluate('''() => {
                    const entries = performance.getEntriesByType("resource");
                    return entries.map(e => ({
                        name: e.name,
                        type: e.initiatorType,
                        transferSize: e.transferSize,
                        duration: e.duration,
                    }));
                }''')
            except Exception as e:
                self.logger.warning(f"Page navigation error: {e}")
                load_time = self.timeout
                status_code = 0
                perf_timing = {}
                memory_info = None
                resources = []
            await browser.close()
        # Process collected data
        raw_data['console_messages'] = console_messages
        raw_data['network_requests'] = network_requests[:100]  # Limit stored
        raw_data['failed_requests'] = failed_requests
        raw_data['performance_timing'] = perf_timing if 'perf_timing' in locals() else {}
        raw_data['memory_info'] = memory_info if 'memory_info' in locals() else None
        raw_data['status_code'] = status_code if 'status_code' in locals() else 0
        # Create metrics
        metrics.append(MetricData(
            name='page_load_time',
            display_name='Page Load Time',
            value=load_time,
            unit='ms',
            source='playwright'
        ))
        metrics.append(MetricData(
            name='total_network_requests',
            display_name='Total Network Requests',
            value=float(len(network_requests)),
            unit='count',
            source='playwright'
        ))
        # Calculate total transfer size
        total_transfer = sum(r.get('transferSize', 0) for r in resources if r.get('transferSize'))
        if total_transfer > 0:
            metrics.append(MetricData(
                name='total_transfer_size',
                display_name='Total Transfer Size',
                value=float(total_transfer),
                unit='bytes',
                source='playwright'
            ))
        if perf_timing.get('domContentLoaded'):
            metrics.append(MetricData(
                name='dom_content_loaded',
                display_name='DOM Content Loaded',
                value=float(perf_timing['domContentLoaded']),
                unit='ms',
                source='playwright'
            ))
        # Memory metrics
        if memory_info:
            metrics.append(MetricData(
                name='js_heap_used',
                display_name='JS Heap Used',
                value=float(memory_info.get('usedJSHeapSize', 0)),
                unit='bytes',
                source='playwright'
            ))
            # Check for high memory usage
            heap_used = memory_info.get('usedJSHeapSize', 0)
            heap_limit = memory_info.get('jsHeapSizeLimit', 1)
            heap_percent = (heap_used / heap_limit) * 100 if heap_limit > 0 else 0
            if heap_percent > 50:
                issues.append(IssueData(
                    category='resources',
                    severity='medium',
                    title='High JavaScript memory usage',
                    description=(
                        f'JavaScript is using {heap_used / (1024*1024):.1f} MB '
                        f'({heap_percent:.1f}% of available heap). '
                        'This may indicate memory-heavy operations or potential leaks.'
                    ),
                    tool='playwright',
                    affected_url=url,
                    remediation=(
                        'Review JavaScript for memory leaks, optimize data structures, '
                        'and ensure proper cleanup of event listeners and timers.'
                    ),
                    raw_data=memory_info
                ))
        # Analyze console messages for errors
        errors = [m for m in console_messages if m['type'] == 'error']
        warnings = [m for m in console_messages if m['type'] == 'warning']
        metrics.append(MetricData(
            name='console_errors_count',
            display_name='Console Errors',
            value=float(len(errors)),
            unit='count',
            source='playwright'
        ))
        metrics.append(MetricData(
            name='console_warnings_count',
            display_name='Console Warnings',
            value=float(len(warnings)),
            unit='count',
            source='playwright'
        ))
        # Create issues for console errors
        if errors:
            # Group similar errors
            error_texts = set(e['text'][:200] for e in errors)
            for error_text in list(error_texts)[:10]:  # Limit to 10 unique errors
                issues.append(IssueData(
                    category='content',
                    severity='medium',
                    title='JavaScript console error',
                    description=f'JavaScript error logged to console: {error_text}',
                    tool='playwright',
                    affected_url=url,
                    remediation='Review and fix the JavaScript error in your code.',
                    raw_data={'error': error_text}
                ))
        # Check for failed network requests
        if failed_requests:
            for req in failed_requests[:5]:  # Limit reported
                issues.append(IssueData(
                    category='content',
                    severity='low',
                    title='Failed network request',
                    description=(
                        f"Request to {req['url'][:100]} failed: {req.get('failure', 'Unknown error')}"
                    ),
                    tool='playwright',
                    affected_url=req['url'],
                    remediation='Ensure the resource is available and CORS is configured correctly.',
                    raw_data=req
                ))
        # Find large resources
        large_threshold = settings.SCANNER_CONFIG.get('LARGE_IMAGE_THRESHOLD_BYTES', 1024 * 1024)
        large_resources = [
            r for r in resources 
            if r.get('transferSize', 0) > large_threshold
        ]
        for resource in large_resources[:5]:  # Limit reported
            size_mb = resource['transferSize'] / (1024 * 1024)
            issues.append(IssueData(
                category='resources',
                severity='medium' if size_mb > 2 else 'low',
                title=f"Large resource detected ({size_mb:.1f} MB)",
                description=(
                    f"The resource '{resource['name'][-80:]}' is {size_mb:.2f} MB. "
                    "Large resources increase page load time and bandwidth usage."
                ),
                tool='playwright',
                affected_url=resource['name'],
                remediation=(
                    'Optimize images using compression, use appropriate formats (WebP, AVIF), '
                    'implement lazy loading, or consider a CDN.'
                ),
                raw_data=resource
            ))
        raw_data['large_resources'] = large_resources
        # Count resources by type
        resource_counts = {}
        for req in network_requests:
            rtype = req.get('resource_type', 'other')
            resource_counts[rtype] = resource_counts.get(rtype, 0) + 1
        raw_data['resource_counts'] = resource_counts
        # Check for excessive requests
        if len(network_requests) > 100:
            issues.append(IssueData(
                category='performance',
                severity='medium',
                title='High number of network requests',
                description=(
                    f'Page made {len(network_requests)} network requests. '
                    'Excessive requests increase page load time and server load.'
                ),
                tool='playwright',
                affected_url=url,
                remediation=(
                    'Consolidate resources, use HTTP/2 multiplexing, implement '
                    'resource bundling, and lazy load non-critical resources.'
                ),
                raw_data=resource_counts
            ))
        self.logger.info(
            f"Playwright scan complete: {len(issues)} issues, {len(metrics)} metrics"
        )
        return ScannerResult(
            scanner_name=self.name,
            status=ScannerStatus.SUCCESS,
            issues=issues,
            metrics=metrics,
            raw_data=raw_data
        )
--- a/backend/scanner/scanners/runner.py
+++ b/backend/scanner/scanners/runner.py
@ -0,0 +1,314 @@
 """
 Scan Runner - Orchestrates multiple scanners.
 This module coordinates running all enabled scanners against a URL
 and aggregates their results into a unified report.
 """
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Dict, List, Optional, Type
 from django.conf import settings
 from .base import BaseScanner, ScannerResult, ScannerStatus
 from .lighthouse import LighthouseScanner
 from .playwright_scanner import PlaywrightScanner
 from .zap import ZAPScanner
 from .headers import HeaderScanner
 from .tls import TLSScanner
 logger = logging.getLogger(__name__)
 # Default scanner classes to run
 DEFAULT_SCANNERS: List[Type[BaseScanner]] = [
    LighthouseScanner,
    PlaywrightScanner,
    ZAPScanner,
    HeaderScanner,
    TLSScanner,
 ]
 class ScanRunner:
    """
    Orchestrates running multiple scanners and aggregating results.
    This class manages:
    - Running enabled scanners in parallel or sequence
    - Aggregating results from all scanners
    - Error handling and partial result compilation
    - Timeout management
    """
    def __init__(
        self,
        scanner_classes: Optional[List[Type[BaseScanner]]] = None,
        config: Optional[Dict[str, Any]] = None,
        max_workers: int = 3
    ):
        """
        Initialize the scan runner.
        Args:
            scanner_classes: List of scanner classes to use (defaults to all)
            config: Configuration dict passed to each scanner
            max_workers: Maximum concurrent scanner threads
        """
        self.scanner_classes = scanner_classes or DEFAULT_SCANNERS
        self.config = config or {}
        self.max_workers = max_workers
        self.logger = logging.getLogger(__name__)
    def run(self, url: str, parallel: bool = True) -> Dict[str, Any]:
        """
        Run all scanners against the URL.
        Args:
            url: The URL to scan
            parallel: Whether to run scanners in parallel
        Returns:
            Aggregated results dictionary containing:
            - status: Overall scan status
            - scores: Aggregated scores
            - issues: All issues from all scanners
            - metrics: All metrics from all scanners
            - scanner_results: Individual scanner results
            - errors: Any scanner errors
        """
        self.logger.info(f"Starting scan runner for {url} with {len(self.scanner_classes)} scanners")
        # Initialize scanners
        scanners = self._initialize_scanners()
        # Run scanners
        if parallel:
            results = self._run_parallel(scanners, url)
        else:
            results = self._run_sequential(scanners, url)
        # Aggregate results
        aggregated = self._aggregate_results(results)
        self.logger.info(
            f"Scan complete: {len(aggregated['issues'])} issues, "
            f"{len(aggregated['metrics'])} metrics, "
            f"status: {aggregated['status']}"
        )
        return aggregated
    def _initialize_scanners(self) -> List[BaseScanner]:
        """Initialize scanner instances."""
        scanners = []
        scanner_config = settings.SCANNER_CONFIG
        for scanner_class in self.scanner_classes:
            try:
                # Merge default config with scanner-specific config
                config = {**self.config}
                # Add scanner-specific config
                if scanner_class == LighthouseScanner:
                    config['service_url'] = 'http://lighthouse:3001'
                    config['timeout'] = scanner_config.get('LIGHTHOUSE_TIMEOUT', 60)
                elif scanner_class == ZAPScanner:
                    config['zap_host'] = scanner_config.get('ZAP_HOST')
                    config['api_key'] = scanner_config.get('ZAP_API_KEY')
                    config['timeout'] = scanner_config.get('ZAP_TIMEOUT', 120)
                elif scanner_class == PlaywrightScanner:
                    config['timeout'] = scanner_config.get('PLAYWRIGHT_TIMEOUT', 30000)
                    config['viewport'] = scanner_config.get('PLAYWRIGHT_VIEWPORT', {'width': 1920, 'height': 1080})
                scanner = scanner_class(config=config)
                scanners.append(scanner)
            except Exception as e:
                self.logger.error(f"Failed to initialize {scanner_class.__name__}: {e}")
        return scanners
    def _run_parallel(
        self, 
        scanners: List[BaseScanner], 
        url: str
    ) -> Dict[str, ScannerResult]:
        """Run scanners in parallel using thread pool."""
        results = {}
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            # Submit all scanner tasks
            future_to_scanner = {
                executor.submit(self._run_scanner, scanner, url): scanner
                for scanner in scanners
            }
            # Collect results as they complete
            for future in as_completed(future_to_scanner):
                scanner = future_to_scanner[future]
                try:
                    result = future.result()
                    results[scanner.name] = result
                except Exception as e:
                    self.logger.error(f"Scanner {scanner.name} raised exception: {e}")
                    results[scanner.name] = ScannerResult(
                        scanner_name=scanner.name,
                        status=ScannerStatus.FAILED,
                        error_message=str(e)
                    )
        return results
    def _run_sequential(
        self, 
        scanners: List[BaseScanner], 
        url: str
    ) -> Dict[str, ScannerResult]:
        """Run scanners sequentially."""
        results = {}
        for scanner in scanners:
            result = self._run_scanner(scanner, url)
            results[scanner.name] = result
        return results
    def _run_scanner(self, scanner: BaseScanner, url: str) -> ScannerResult:
        """Run a single scanner with error handling."""
        self.logger.info(f"Running scanner: {scanner.name}")
        try:
            # Check availability first
            if not scanner.is_available():
                self.logger.warning(f"Scanner {scanner.name} is not available")
                return ScannerResult(
                    scanner_name=scanner.name,
                    status=ScannerStatus.SKIPPED,
                    error_message=f"{scanner.name} service is not available"
                )
            # Run the scanner
            result = scanner.run(url)
            self.logger.info(
                f"Scanner {scanner.name} completed with status: {result.status}"
            )
            return result
        except Exception as e:
            self.logger.error(f"Scanner {scanner.name} failed: {e}")
            return ScannerResult(
                scanner_name=scanner.name,
                status=ScannerStatus.FAILED,
                error_message=str(e)
            )
    def _aggregate_results(
        self, 
        results: Dict[str, ScannerResult]
    ) -> Dict[str, Any]:
        """Aggregate results from all scanners."""
        all_issues = []
        all_metrics = []
        all_scores = {}
        raw_data = {}
        errors = []
        successful_scanners = 0
        failed_scanners = 0
        for scanner_name, result in results.items():
            # Track scanner status
            if result.status == ScannerStatus.SUCCESS:
                successful_scanners += 1
            elif result.status == ScannerStatus.FAILED:
                failed_scanners += 1
                if result.error_message:
                    errors.append({
                        'scanner': scanner_name,
                        'error': result.error_message
                    })
            elif result.status == ScannerStatus.PARTIAL:
                successful_scanners += 1
            # Collect issues
            for issue in result.issues:
                all_issues.append({
                    'category': issue.category,
                    'severity': issue.severity,
                    'title': issue.title,
                    'description': issue.description,
                    'tool': issue.tool,
                    'affected_url': issue.affected_url,
                    'remediation': issue.remediation,
                    'raw_data': issue.raw_data,
                })
            # Collect metrics
            for metric in result.metrics:
                all_metrics.append({
                    'name': metric.name,
                    'display_name': metric.display_name,
                    'value': metric.value,
                    'unit': metric.unit,
                    'source': metric.source,
                    'score': metric.score,
                })
            # Collect scores
            if result.scores:
                all_scores[scanner_name] = result.scores
            # Store raw data
            if result.raw_data:
                raw_data[scanner_name] = result.raw_data
        # Determine overall status
        if failed_scanners == len(results):
            overall_status = 'failed'
        elif failed_scanners > 0:
            overall_status = 'partial'
        else:
            overall_status = 'done'
        # Calculate aggregated scores
        aggregated_scores = self._calculate_aggregated_scores(all_scores)
        return {
            'status': overall_status,
            'scores': aggregated_scores,
            'issues': all_issues,
            'metrics': all_metrics,
            'scanner_results': {
                name: {
                    'status': result.status.value,
                    'error': result.error_message,
                }
                for name, result in results.items()
            },
            'raw_data': raw_data,
            'errors': errors,
            'summary': {
                'total_scanners': len(results),
                'successful': successful_scanners,
                'failed': failed_scanners,
                'total_issues': len(all_issues),
                'total_metrics': len(all_metrics),
            }
        }
    def _calculate_aggregated_scores(
        self, 
        scanner_scores: Dict[str, Dict[str, int]]
    ) -> Dict[str, Optional[int]]:
        """Calculate aggregated scores from all scanners."""
        # Lighthouse provides the main scores
        lighthouse_scores = scanner_scores.get('lighthouse', {})
        return {
            'performance': lighthouse_scores.get('performance'),
            'accessibility': lighthouse_scores.get('accessibility'),
            'best_practices': lighthouse_scores.get('best_practices'),
            'seo': lighthouse_scores.get('seo'),
        }
--- a/backend/scanner/scanners/tls.py
+++ b/backend/scanner/scanners/tls.py
@ -0,0 +1,380 @@
 """
 TLS/SSL Security Scanner.
 This module checks TLS/SSL configuration and certificate validity.
 """
 import logging
 import socket
 import ssl
 from datetime import datetime, timezone
 from typing import Any, Dict, Optional
 from urllib.parse import urlparse
 from .base import (
    BaseScanner,
    ScannerResult,
    ScannerStatus,
    IssueData,
    MetricData,
 )
 logger = logging.getLogger(__name__)
 class TLSScanner(BaseScanner):
    """
    Scanner for TLS/SSL certificate and configuration.
    Checks:
    - Certificate validity
    - Certificate expiration
    - HTTPS availability
    - HTTP to HTTPS redirect
    """
    name = "tls_check"
    def __init__(self, config: Optional[Dict[str, Any]] = None):
        super().__init__(config)
        self.timeout = self.config.get('timeout', 10)
    def run(self, url: str) -> ScannerResult:
        """
        Run TLS/SSL analysis on the URL.
        Args:
            url: The URL to analyze
        Returns:
            ScannerResult with TLS findings
        """
        self.logger.info(f"Starting TLS scan for {url}")
        try:
            parsed = urlparse(url)
            hostname = parsed.netloc.split(':')[0]
            port = parsed.port or (443 if parsed.scheme == 'https' else 80)
            issues = []
            metrics = []
            raw_data = {}
            # Check if site is HTTPS
            if parsed.scheme == 'http':
                # Check if HTTPS is available
                https_available, https_result = self._check_https_available(hostname)
                raw_data['https_available'] = https_available
                raw_data['https_check'] = https_result
                if https_available:
                    issues.append(IssueData(
                        category='tls',
                        severity='high',
                        title='Site accessed over HTTP but HTTPS is available',
                        description=(
                            'The site was accessed over unencrypted HTTP, but HTTPS '
                            'appears to be available. All traffic should use HTTPS.'
                        ),
                        tool='tls_check',
                        affected_url=url,
                        remediation=(
                            'Redirect all HTTP traffic to HTTPS using a 301 redirect. '
                            'Implement HSTS to prevent future HTTP access.'
                        )
                    ))
                else:
                    issues.append(IssueData(
                        category='tls',
                        severity='critical',
                        title='Site does not support HTTPS',
                        description=(
                            'The site does not appear to have HTTPS configured. '
                            'All data transmitted is unencrypted and vulnerable to interception.'
                        ),
                        tool='tls_check',
                        affected_url=url,
                        remediation=(
                            'Configure TLS/SSL for your server. Obtain a certificate from '
                            "Let's Encrypt (free) or a commercial CA."
                        )
                    ))
                    metrics.append(MetricData(
                        name='tls_enabled',
                        display_name='TLS Enabled',
                        value=0.0,
                        unit='score',
                        source='tls_check'
                    ))
                    return ScannerResult(
                        scanner_name=self.name,
                        status=ScannerStatus.SUCCESS,
                        issues=issues,
                        metrics=metrics,
                        raw_data=raw_data
                    )
            # For HTTPS URLs, check certificate
            cert_info = self._get_certificate_info(hostname, port)
            raw_data['certificate'] = cert_info
            if cert_info.get('error'):
                issues.append(IssueData(
                    category='tls',
                    severity='critical',
                    title='Certificate validation failed',
                    description=f"SSL certificate error: {cert_info['error']}",
                    tool='tls_check',
                    affected_url=url,
                    remediation=(
                        'Ensure your SSL certificate is valid, not expired, '
                        'and properly configured for your domain.'
                    )
                ))
                metrics.append(MetricData(
                    name='certificate_valid',
                    display_name='Certificate Valid',
                    value=0.0,
                    unit='score',
                    source='tls_check'
                ))
            else:
                # Certificate is valid
                metrics.append(MetricData(
                    name='certificate_valid',
                    display_name='Certificate Valid',
                    value=1.0,
                    unit='score',
                    source='tls_check'
                ))
                metrics.append(MetricData(
                    name='tls_enabled',
                    display_name='TLS Enabled',
                    value=1.0,
                    unit='score',
                    source='tls_check'
                ))
                # Check expiration
                if cert_info.get('expires'):
                    try:
                        expires = datetime.strptime(
                            cert_info['expires'], 
                            '%b %d %H:%M:%S %Y %Z'
                        )
                        expires = expires.replace(tzinfo=timezone.utc)
                        now = datetime.now(timezone.utc)
                        days_until_expiry = (expires - now).days
                        metrics.append(MetricData(
                            name='certificate_days_until_expiry',
                            display_name='Days Until Certificate Expiry',
                            value=float(days_until_expiry),
                            unit='count',
                            source='tls_check'
                        ))
                        if days_until_expiry <= 0:
                            issues.append(IssueData(
                                category='tls',
                                severity='critical',
                                title='SSL certificate has expired',
                                description=(
                                    f"The SSL certificate expired on {cert_info['expires']}. "
                                    "Users will see security warnings."
                                ),
                                tool='tls_check',
                                affected_url=url,
                                remediation='Renew your SSL certificate immediately.'
                            ))
                        elif days_until_expiry <= 7:
                            issues.append(IssueData(
                                category='tls',
                                severity='high',
                                title='SSL certificate expiring very soon',
                                description=(
                                    f"The SSL certificate will expire in {days_until_expiry} days "
                                    f"(on {cert_info['expires']}). Renew immediately."
                                ),
                                tool='tls_check',
                                affected_url=url,
                                remediation='Renew your SSL certificate before it expires.'
                            ))
                        elif days_until_expiry <= 30:
                            issues.append(IssueData(
                                category='tls',
                                severity='medium',
                                title='SSL certificate expiring soon',
                                description=(
                                    f"The SSL certificate will expire in {days_until_expiry} days "
                                    f"(on {cert_info['expires']}). Plan for renewal."
                                ),
                                tool='tls_check',
                                affected_url=url,
                                remediation=(
                                    'Renew your SSL certificate before expiration. '
                                    "Consider using auto-renewal with Let's Encrypt."
                                )
                            ))
                    except Exception as e:
                        self.logger.warning(f"Could not parse certificate expiry: {e}")
                # Check certificate subject matches hostname
                if cert_info.get('subject'):
                    subject_cn = dict(x[0] for x in cert_info['subject']).get('commonName', '')
                    san = cert_info.get('subjectAltName', [])
                    san_names = [name for type_, name in san if type_ == 'DNS']
                    hostname_matched = self._hostname_matches_cert(
                        hostname, subject_cn, san_names
                    )
                    if not hostname_matched:
                        issues.append(IssueData(
                            category='tls',
                            severity='high',
                            title='Certificate hostname mismatch',
                            description=(
                                f"The SSL certificate is for '{subject_cn}' but "
                                f"the site is accessed as '{hostname}'."
                            ),
                            tool='tls_check',
                            affected_url=url,
                            remediation=(
                                'Obtain a certificate that includes your domain name, '
                                'or add it to the Subject Alternative Names (SAN).'
                            )
                        ))
            # Check for HTTP to HTTPS redirect
            if parsed.scheme == 'https':
                redirect_info = self._check_http_redirect(hostname)
                raw_data['http_redirect'] = redirect_info
                if not redirect_info.get('redirects_to_https'):
                    issues.append(IssueData(
                        category='tls',
                        severity='medium',
                        title='No HTTP to HTTPS redirect',
                        description=(
                            'The site does not redirect HTTP requests to HTTPS. '
                            'Users accessing via HTTP will use an insecure connection.'
                        ),
                        tool='tls_check',
                        affected_url=f"http://{hostname}",
                        remediation=(
                            'Configure your server to redirect all HTTP (port 80) '
                            'requests to HTTPS (port 443) with a 301 redirect.'
                        )
                    ))
            self.logger.info(f"TLS scan complete: {len(issues)} issues")
            return ScannerResult(
                scanner_name=self.name,
                status=ScannerStatus.SUCCESS,
                issues=issues,
                metrics=metrics,
                raw_data=raw_data
            )
        except Exception as e:
            return self._create_error_result(e)
    def _check_https_available(self, hostname: str) -> tuple:
        """Check if HTTPS is available for the hostname."""
        try:
            context = ssl.create_default_context()
            with socket.create_connection((hostname, 443), timeout=self.timeout) as sock:
                with context.wrap_socket(sock, server_hostname=hostname) as ssock:
                    return True, {'available': True, 'protocol': ssock.version()}
        except ssl.SSLError as e:
            return True, {'available': True, 'error': str(e)}
        except Exception as e:
            return False, {'available': False, 'error': str(e)}
    def _get_certificate_info(self, hostname: str, port: int = 443) -> Dict:
        """Get SSL certificate information."""
        try:
            context = ssl.create_default_context()
            with socket.create_connection((hostname, port), timeout=self.timeout) as sock:
                with context.wrap_socket(sock, server_hostname=hostname) as ssock:
                    cert = ssock.getpeercert()
                    return {
                        'subject': cert.get('subject'),
                        'issuer': cert.get('issuer'),
                        'version': cert.get('version'),
                        'serialNumber': cert.get('serialNumber'),
                        'notBefore': cert.get('notBefore'),
                        'expires': cert.get('notAfter'),
                        'subjectAltName': cert.get('subjectAltName', []),
                        'protocol': ssock.version(),
                        'cipher': ssock.cipher(),
                    }
        except ssl.SSLCertVerificationError as e:
            return {'error': f"Certificate verification failed: {e.verify_message}"}
        except ssl.SSLError as e:
            return {'error': f"SSL error: {str(e)}"}
        except socket.timeout:
            return {'error': "Connection timed out"}
        except Exception as e:
            return {'error': str(e)}
    def _hostname_matches_cert(
        self, 
        hostname: str, 
        cn: str, 
        san_names: list
    ) -> bool:
        """Check if hostname matches certificate CN or SAN."""
        all_names = [cn] + san_names
        for name in all_names:
            if name == hostname:
                return True
            # Handle wildcard certificates
            if name.startswith('*.'):
                domain = name[2:]
                if hostname.endswith(domain):
                    # Ensure wildcard only matches one level
                    prefix = hostname[:-len(domain)-1]
                    if '.' not in prefix:
                        return True
        return False
    def _check_http_redirect(self, hostname: str) -> Dict:
        """Check if HTTP redirects to HTTPS."""
        import httpx
        try:
            with httpx.Client(
                timeout=self.timeout, 
                follow_redirects=False
            ) as client:
                response = client.get(f"http://{hostname}")
                if response.status_code in (301, 302, 303, 307, 308):
                    location = response.headers.get('location', '')
                    redirects_to_https = location.startswith('https://')
                    return {
                        'redirects_to_https': redirects_to_https,
                        'status_code': response.status_code,
                        'location': location,
                    }
                else:
                    return {
                        'redirects_to_https': False,
                        'status_code': response.status_code,
                    }
        except Exception as e:
            return {
                'redirects_to_https': False,
                'error': str(e),
            }
--- a/backend/scanner/scanners/zap.py
+++ b/backend/scanner/scanners/zap.py
@ -0,0 +1,307 @@
 """
 OWASP ZAP Scanner Integration.
 This module integrates with OWASP ZAP for security scanning,
 detecting vulnerabilities like XSS, injection flaws, and
 misconfigurations.
 """
 import logging
 import time
 from typing import Any, Dict, List, Optional
 import httpx
 from django.conf import settings
 from .base import (
    BaseScanner,
    ScannerResult,
    ScannerStatus,
    IssueData,
    MetricData,
 )
 logger = logging.getLogger(__name__)
 class ZAPScanner(BaseScanner):
    """
    Scanner using OWASP ZAP for security vulnerability detection.
    Performs baseline scans to identify common security issues:
    - XSS vulnerabilities
    - SQL injection patterns
    - Insecure cookies
    - Missing security headers
    - SSL/TLS issues
    - And more...
    """
    name = "owasp_zap"
    def __init__(self, config: Optional[Dict[str, Any]] = None):
        super().__init__(config)
        scanner_config = settings.SCANNER_CONFIG
        self.zap_host = self.config.get('zap_host', scanner_config.get('ZAP_HOST', 'http://zap:8080'))
        self.api_key = self.config.get('api_key', scanner_config.get('ZAP_API_KEY', ''))
        self.timeout = self.config.get('timeout', scanner_config.get('ZAP_TIMEOUT', 120))
    def is_available(self) -> bool:
        """Check if ZAP service is available."""
        try:
            with httpx.Client(timeout=10) as client:
                response = client.get(
                    f"{self.zap_host}/JSON/core/view/version/",
                    params={'apikey': self.api_key}
                )
                return response.status_code == 200
        except Exception as e:
            self.logger.warning(f"ZAP service not available: {e}")
            return False
    def run(self, url: str) -> ScannerResult:
        """
        Run ZAP security scan against the URL.
        Args:
            url: The URL to scan
        Returns:
            ScannerResult with security findings
        """
        self.logger.info(f"Starting ZAP scan for {url}")
        try:
            # Access the target to populate ZAP's site tree
            self._access_url(url)
            # Spider the site (limited crawl)
            self._spider_url(url)
            # Run active scan
            self._active_scan(url)
            # Get alerts
            alerts = self._get_alerts(url)
            return self._parse_results(url, alerts)
        except httpx.TimeoutException:
            return self._create_error_result(
                Exception("ZAP scan timed out")
            )
        except httpx.HTTPStatusError as e:
            return self._create_error_result(
                Exception(f"ZAP service error: {e.response.status_code}")
            )
        except Exception as e:
            return self._create_error_result(e)
    def _zap_request(self, endpoint: str, params: Optional[Dict] = None) -> Dict:
        """Make a request to the ZAP API."""
        if params is None:
            params = {}
        params['apikey'] = self.api_key
        with httpx.Client(timeout=self.timeout) as client:
            response = client.get(
                f"{self.zap_host}{endpoint}",
                params=params
            )
            response.raise_for_status()
            return response.json()
    def _access_url(self, url: str) -> None:
        """Access the URL to add it to ZAP's site tree."""
        self.logger.debug(f"Accessing URL in ZAP: {url}")
        self._zap_request(
            '/JSON/core/action/accessUrl/',
            {'url': url, 'followRedirects': 'true'}
        )
        time.sleep(2)  # Wait for ZAP to process
    def _spider_url(self, url: str) -> None:
        """Spider the URL to discover pages."""
        self.logger.debug(f"Spidering URL: {url}")
        # Start spider
        result = self._zap_request(
            '/JSON/spider/action/scan/',
            {
                'url': url,
                'maxChildren': '5',  # Limited crawl
                'recurse': 'true',
                'subtreeOnly': 'true'
            }
        )
        scan_id = result.get('scan')
        if not scan_id:
            return
        # Wait for spider to complete (with timeout)
        start_time = time.time()
        while time.time() - start_time < 60:  # 60 second spider timeout
            status = self._zap_request(
                '/JSON/spider/view/status/',
                {'scanId': scan_id}
            )
            if int(status.get('status', '100')) >= 100:
                break
            time.sleep(2)
    def _active_scan(self, url: str) -> None:
        """Run active scan against the URL."""
        self.logger.debug(f"Starting active scan: {url}")
        # Start active scan
        result = self._zap_request(
            '/JSON/ascan/action/scan/',
            {
                'url': url,
                'recurse': 'true',
                'inScopeOnly': 'true'
            }
        )
        scan_id = result.get('scan')
        if not scan_id:
            return
        # Wait for scan to complete (with timeout)
        start_time = time.time()
        while time.time() - start_time < self.timeout:
            status = self._zap_request(
                '/JSON/ascan/view/status/',
                {'scanId': scan_id}
            )
            if int(status.get('status', '100')) >= 100:
                break
            time.sleep(5)
    def _get_alerts(self, url: str) -> List[Dict]:
        """Get alerts for the scanned URL."""
        self.logger.debug(f"Fetching alerts for: {url}")
        result = self._zap_request(
            '/JSON/core/view/alerts/',
            {
                'baseurl': url,
                'start': '0',
                'count': '100'  # Limit alerts
            }
        )
        return result.get('alerts', [])
    def _parse_results(self, url: str, alerts: List[Dict]) -> ScannerResult:
        """
        Parse ZAP alerts into ScannerResult format.
        Args:
            url: The scanned URL
            alerts: List of ZAP alerts
        Returns:
            Parsed ScannerResult
        """
        issues = []
        metrics = []
        # Count alerts by risk level
        risk_counts = {
            'High': 0,
            'Medium': 0,
            'Low': 0,
            'Informational': 0
        }
        for alert in alerts:
            risk = alert.get('risk', 'Informational')
            risk_counts[risk] = risk_counts.get(risk, 0) + 1
            severity = self._map_risk_to_severity(risk)
            issues.append(IssueData(
                category='security',
                severity=severity,
                title=alert.get('name', 'Unknown vulnerability'),
                description=self._format_description(alert),
                tool='owasp_zap',
                affected_url=alert.get('url', url),
                remediation=alert.get('solution', 'Review and fix the vulnerability.'),
                raw_data={
                    'alert_ref': alert.get('alertRef'),
                    'cweid': alert.get('cweid'),
                    'wascid': alert.get('wascid'),
                    'confidence': alert.get('confidence'),
                    'evidence': alert.get('evidence', '')[:500],  # Truncate evidence
                }
            ))
        # Create metrics for vulnerability counts
        for risk_level, count in risk_counts.items():
            if count > 0:
                metrics.append(MetricData(
                    name=f'zap_{risk_level.lower()}_alerts',
                    display_name=f'{risk_level} Risk Alerts',
                    value=float(count),
                    unit='count',
                    source='owasp_zap'
                ))
        metrics.append(MetricData(
            name='total_security_alerts',
            display_name='Total Security Alerts',
            value=float(len(alerts)),
            unit='count',
            source='owasp_zap'
        ))
        self.logger.info(
            f"ZAP scan complete: {len(alerts)} alerts "
            f"(High: {risk_counts['High']}, Medium: {risk_counts['Medium']}, "
            f"Low: {risk_counts['Low']})"
        )
        return ScannerResult(
            scanner_name=self.name,
            status=ScannerStatus.SUCCESS,
            issues=issues,
            metrics=metrics,
            raw_data={
                'total_alerts': len(alerts),
                'risk_counts': risk_counts,
                'alerts': alerts[:50]  # Store limited raw alerts
            }
        )
    def _map_risk_to_severity(self, risk: str) -> str:
        """Map ZAP risk level to our severity."""
        mapping = {
            'High': 'high',
            'Medium': 'medium',
            'Low': 'low',
            'Informational': 'info',
        }
        return mapping.get(risk, 'info')
    def _format_description(self, alert: Dict) -> str:
        """Format ZAP alert into readable description."""
        parts = []
        if alert.get('description'):
            parts.append(alert['description'])
        if alert.get('attack'):
            parts.append(f"\nAttack: {alert['attack']}")
        if alert.get('evidence'):
            evidence = alert['evidence'][:200]
            parts.append(f"\nEvidence: {evidence}")
        if alert.get('reference'):
            parts.append(f"\nReference: {alert['reference']}")
        return '\n'.join(parts)
--- a/backend/scanner/tasks.py
+++ b/backend/scanner/tasks.py
@ -0,0 +1,306 @@
 """
 Celery tasks for background scanning.
 This module defines the Celery tasks that orchestrate website scans
 in the background.
 """
 import logging
 from datetime import timedelta
 from typing import Optional
 from celery import shared_task
 from celery.exceptions import SoftTimeLimitExceeded
 from django.conf import settings
 from django.utils import timezone
 from websites.models import Website, Scan, ScanStatus, Issue, Metric
 from scanner.scanners import ScanRunner
 from scanner.utils import validate_url, get_domain_from_url
 logger = logging.getLogger(__name__)
@shared_task(
    bind=True,
    max_retries=2,
    default_retry_delay=60,
    soft_time_limit=300,
    time_limit=330,
 )
 def run_scan_task(self, scan_id: str) -> dict:
    """
    Main Celery task for running a website scan.
    This task:
    1. Updates scan status to running
    2. Orchestrates all scanners
    3. Saves results to database
    4. Handles errors and partial results
    Args:
        scan_id: UUID of the Scan record
    Returns:
        Dict with scan results summary
    """
    logger.info(f"Starting scan task for scan_id: {scan_id}")
    try:
        # Get the scan record
        scan = Scan.objects.select_related('website').get(id=scan_id)
    except Scan.DoesNotExist:
        logger.error(f"Scan {scan_id} not found")
        return {'error': f'Scan {scan_id} not found'}
    # Update status to running
    scan.status = ScanStatus.RUNNING
    scan.started_at = timezone.now()
    scan.celery_task_id = self.request.id
    scan.save(update_fields=['status', 'started_at', 'celery_task_id'])
    url = scan.website.url
    try:
        # Run the scan pipeline
        runner = ScanRunner()
        results = runner.run(url)
        # Save results to database
        _save_scan_results(scan, results)
        # Update website last_scanned_at
        scan.website.last_scanned_at = timezone.now()
        scan.website.save(update_fields=['last_scanned_at'])
        logger.info(f"Scan {scan_id} completed successfully")
        return {
            'scan_id': str(scan_id),
            'status': scan.status,
            'overall_score': scan.overall_score,
            'issues_count': scan.issues.count(),
            'metrics_count': scan.metrics.count(),
        }
    except SoftTimeLimitExceeded:
        logger.warning(f"Scan {scan_id} timed out")
        scan.status = ScanStatus.PARTIAL
        scan.error_message = "Scan timed out before completing all checks"
        scan.completed_at = timezone.now()
        scan.save(update_fields=['status', 'error_message', 'completed_at'])
        return {
            'scan_id': str(scan_id),
            'status': 'partial',
            'error': 'Scan timed out'
        }
    except Exception as e:
        logger.exception(f"Scan {scan_id} failed with error: {e}")
        scan.status = ScanStatus.FAILED
        scan.error_message = str(e)
        scan.completed_at = timezone.now()
        scan.save(update_fields=['status', 'error_message', 'completed_at'])
        # Retry on certain errors
        if self.request.retries < self.max_retries:
            raise self.retry(exc=e)
        return {
            'scan_id': str(scan_id),
            'status': 'failed',
            'error': str(e)
        }
 def _save_scan_results(scan: Scan, results: dict) -> None:
    """
    Save scan results to the database.
    Args:
        scan: The Scan model instance
        results: Aggregated results from ScanRunner
    """
    # Update scan status
    status_map = {
        'done': ScanStatus.DONE,
        'partial': ScanStatus.PARTIAL,
        'failed': ScanStatus.FAILED,
    }
    scan.status = status_map.get(results['status'], ScanStatus.DONE)
    scan.completed_at = timezone.now()
    # Save scores
    scores = results.get('scores', {})
    scan.performance_score = scores.get('performance')
    scan.accessibility_score = scores.get('accessibility')
    scan.seo_score = scores.get('seo')
    scan.best_practices_score = scores.get('best_practices')
    # Save raw data
    raw_data = results.get('raw_data', {})
    scan.raw_lighthouse_data = raw_data.get('lighthouse')
    scan.raw_zap_data = raw_data.get('owasp_zap')
    scan.raw_playwright_data = raw_data.get('playwright')
    scan.raw_headers_data = raw_data.get('header_check')
    # Save errors if any
    if results.get('errors'):
        scan.error_message = '\n'.join(
            f"{e['scanner']}: {e['error']}" 
            for e in results['errors']
        )
    scan.save()
    # Create Issue records
    issues_to_create = []
    for issue_data in results.get('issues', []):
        issues_to_create.append(Issue(
            scan=scan,
            category=issue_data['category'],
            severity=issue_data['severity'],
            title=issue_data['title'][:500],  # Truncate if too long
            description=issue_data['description'],
            tool=issue_data['tool'],
            affected_url=issue_data.get('affected_url'),
            remediation=issue_data.get('remediation'),
            raw_data=issue_data.get('raw_data'),
        ))
    if issues_to_create:
        Issue.objects.bulk_create(issues_to_create)
    # Create Metric records
    metrics_to_create = []
    seen_metrics = set()  # Track unique metrics
    for metric_data in results.get('metrics', []):
        metric_key = metric_data['name']
        if metric_key in seen_metrics:
            continue  # Skip duplicates
        seen_metrics.add(metric_key)
        # Map unit strings to model choices
        unit_map = {
            'ms': 'ms',
            'milliseconds': 'ms',
            's': 's',
            'seconds': 's',
            'bytes': 'bytes',
            'kb': 'kb',
            'kilobytes': 'kb',
            'mb': 'mb',
            'megabytes': 'mb',
            'score': 'score',
            'percent': 'percent',
            'count': 'count',
        }
        unit = unit_map.get(metric_data['unit'].lower(), 'count')
        metrics_to_create.append(Metric(
            scan=scan,
            name=metric_data['name'],
            display_name=metric_data['display_name'][:200],
            value=metric_data['value'],
            unit=unit,
            source=metric_data['source'],
            score=metric_data.get('score'),
        ))
    if metrics_to_create:
        Metric.objects.bulk_create(metrics_to_create)
    # Calculate security score based on issues
    scan.calculate_security_score()
    # Calculate overall score
    scan.calculate_overall_score()
    scan.save(update_fields=['security_score', 'overall_score'])
    logger.info(
        f"Saved scan results: {len(issues_to_create)} issues, "
        f"{len(metrics_to_create)} metrics"
    )
@shared_task
 def cleanup_old_scans(days: int = 30) -> dict:
    """
    Clean up old scan data to prevent database growth.
    Args:
        days: Number of days to keep scans
    Returns:
        Dict with cleanup statistics
    """
    cutoff_date = timezone.now() - timedelta(days=days)
    # Delete old scans (cascades to issues and metrics)
    deleted_count, _ = Scan.objects.filter(
        created_at__lt=cutoff_date
    ).delete()
    logger.info(f"Cleaned up {deleted_count} old scans")
    return {
        'deleted_scans': deleted_count,
        'cutoff_date': cutoff_date.isoformat(),
    }
 def check_rate_limit(url: str) -> Optional[str]:
    """
    Check if URL scanning is rate limited.
    Args:
        url: The URL to check
    Returns:
        Error message if rate limited, None otherwise
    """
    from django.core.cache import cache
    scanner_config = settings.SCANNER_CONFIG
    rate_limit_minutes = scanner_config.get('SCAN_RATE_LIMIT_MINUTES', 5)
    # Create a cache key based on the URL
    domain = get_domain_from_url(url)
    cache_key = f"scan_rate_limit:{domain}"
    # Check if already scanned recently
    last_scan_time = cache.get(cache_key)
    if last_scan_time:
        return (
            f"This URL was scanned recently. "
            f"Please wait {rate_limit_minutes} minutes between scans."
        )
    # Set the rate limit
    cache.set(cache_key, timezone.now().isoformat(), timeout=rate_limit_minutes * 60)
    return None
 def check_concurrent_scan_limit() -> Optional[str]:
    """
    Check if maximum concurrent scans limit is reached.
    Returns:
        Error message if limit reached, None otherwise
    """
    scanner_config = settings.SCANNER_CONFIG
    max_concurrent = scanner_config.get('MAX_CONCURRENT_SCANS', 3)
    running_count = Scan.objects.filter(status=ScanStatus.RUNNING).count()
    if running_count >= max_concurrent:
        return (
            f"Maximum concurrent scans ({max_concurrent}) reached. "
            "Please wait for current scans to complete."
        )
    return None
--- a/backend/scanner/utils.py
+++ b/backend/scanner/utils.py
@ -0,0 +1,185 @@
 """
 URL validation and safety utilities.
 This module provides functions for validating and normalizing URLs,
 including safety checks to prevent SSRF attacks.
 """
 import ipaddress
 import logging
 import socket
 from typing import Tuple
 from urllib.parse import urlparse, urlunparse
 import validators
 from django.conf import settings
 logger = logging.getLogger(__name__)
 def validate_url(url: str) -> Tuple[bool, str]:
    """
    Validate and normalize a URL for scanning.
    Args:
        url: The URL to validate
    Returns:
        Tuple of (is_valid, normalized_url_or_error_message)
    """
    if not url:
        return False, "URL is required"
    # Basic URL validation
    if not validators.url(url):
        return False, "Invalid URL format"
    # Parse the URL
    try:
        parsed = urlparse(url)
    except Exception as e:
        return False, f"Could not parse URL: {e}"
    # Check scheme
    if parsed.scheme not in ('http', 'https'):
        return False, "URL must use http or https scheme"
    # Check hostname
    hostname = parsed.netloc.split(':')[0].lower()
    if not hostname:
        return False, "URL must have a valid hostname"
    # Safety check: block localhost and private IPs
    is_safe, safety_error = check_url_safety(hostname)
    if not is_safe:
        return False, safety_error
    # Normalize URL
    normalized = normalize_url(url)
    return True, normalized
 def normalize_url(url: str) -> str:
    """
    Normalize a URL to a canonical form.
    - Lowercase hostname
    - Remove trailing slashes from path
    - Remove default ports
    - Sort query parameters
    Args:
        url: The URL to normalize
    Returns:
        Normalized URL string
    """
    parsed = urlparse(url)
    # Lowercase hostname
    hostname = parsed.netloc.lower()
    # Remove default ports
    if ':80' in hostname and parsed.scheme == 'http':
        hostname = hostname.replace(':80', '')
    elif ':443' in hostname and parsed.scheme == 'https':
        hostname = hostname.replace(':443', '')
    # Normalize path (remove trailing slash except for root)
    path = parsed.path
    if path != '/' and path.endswith('/'):
        path = path.rstrip('/')
    if not path:
        path = '/'
    # Reconstruct URL
    normalized = urlunparse((
        parsed.scheme,
        hostname,
        path,
        parsed.params,
        parsed.query,
        ''  # Remove fragment
    ))
    return normalized
 def check_url_safety(hostname: str) -> Tuple[bool, str]:
    """
    Check if a hostname is safe to scan (not localhost/private IP).
    Args:
        hostname: The hostname to check
    Returns:
        Tuple of (is_safe, error_message_if_not_safe)
    """
    scanner_config = settings.SCANNER_CONFIG
    blocked_hosts = scanner_config.get('BLOCKED_HOSTS', [])
    blocked_ranges = scanner_config.get('BLOCKED_IP_RANGES', [])
    # Check blocked hostnames
    if hostname in blocked_hosts:
        return False, f"Scanning {hostname} is not allowed"
    # Try to resolve hostname to IP
    try:
        ip_addresses = socket.getaddrinfo(
            hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
        )
    except socket.gaierror:
        # Could not resolve - might be okay for some hostnames
        logger.warning(f"Could not resolve hostname: {hostname}")
        return True, ""
    for family, type_, proto, canonname, sockaddr in ip_addresses:
        ip_str = sockaddr[0]
        try:
            ip = ipaddress.ip_address(ip_str)
            # Check if IP is in any blocked range
            for blocked_range in blocked_ranges:
                try:
                    network = ipaddress.ip_network(blocked_range, strict=False)
                    if ip in network:
                        return False, f"Scanning private/local IP addresses is not allowed ({ip_str})"
                except ValueError:
                    continue
            # Additional checks
            if ip.is_private:
                return False, f"Scanning private IP addresses is not allowed ({ip_str})"
            if ip.is_loopback:
                return False, f"Scanning localhost/loopback addresses is not allowed ({ip_str})"
            if ip.is_link_local:
                return False, f"Scanning link-local addresses is not allowed ({ip_str})"
            if ip.is_reserved:
                return False, f"Scanning reserved IP addresses is not allowed ({ip_str})"
        except ValueError:
            # Not a valid IP address format
            continue
    return True, ""
 def get_domain_from_url(url: str) -> str:
    """
    Extract the domain from a URL.
    Args:
        url: The URL to extract domain from
    Returns:
        The domain/hostname
    """
    parsed = urlparse(url)
    return parsed.netloc.split(':')[0].lower()
--- a/backend/templates/base.html
+++ b/backend/templates/base.html
@ -0,0 +1,89 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{% block title %}Website Analyzer{% endblock %}</title>
    <!-- Tailwind CSS -->
    <script src="https://cdn.tailwindcss.com"></script>
    <!-- Alpine.js for interactivity -->
    <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
    <!-- Chart.js for visualizations -->
    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
    <style>
        [x-cloak] { display: none !important; }
        /* Custom animations */
        @keyframes pulse-slow {
            0%, 100% { opacity: 1; }
            50% { opacity: 0.5; }
        }
        .animate-pulse-slow {
            animation: pulse-slow 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
        }
        /* Score circle gradient */
        .score-circle {
            background: conic-gradient(
                var(--score-color) calc(var(--score) * 3.6deg),
                #e5e7eb calc(var(--score) * 3.6deg)
            );
        }
    </style>
    {% block extra_head %}{% endblock %}
 </head>
 <body class="bg-gray-50 min-h-screen">
    <!-- Navigation -->
    <nav class="bg-white shadow-sm border-b border-gray-200">
        <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
            <div class="flex justify-between h-16">
                <div class="flex items-center">
                    <a href="/" class="flex items-center space-x-2">
                        <svg class="w-8 h-8 text-blue-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" 
                                  d="M9 12l2 2 4-4m5.618-4.016A11.955 11.955 0 0112 2.944a11.955 11.955 0 01-8.618 3.04A12.02 12.02 0 003 9c0 5.591 3.824 10.29 9 11.622 5.176-1.332 9-6.03 9-11.622 0-1.042-.133-2.052-.382-3.016z"/>
                        </svg>
                        <span class="font-bold text-xl text-gray-900">Website Analyzer</span>
                    </a>
                </div>
                <div class="flex items-center space-x-4">
                    <a href="/" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">
                        New Scan
                    </a>
                    <a href="/api/" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">
                        API
                    </a>
                </div>
            </div>
        </div>
    </nav>
    <!-- Main Content -->
    <main class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
        {% block content %}{% endblock %}
    </main>
    <!-- Footer -->
    <footer class="bg-white border-t border-gray-200 mt-auto">
        <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-6">
            <div class="flex justify-between items-center">
                <p class="text-gray-500 text-sm">
                    Website Analyzer - Security & Performance Scanner
                </p>
                <div class="flex space-x-4">
                    <a href="/api/health/" class="text-gray-400 hover:text-gray-600 text-sm">
                        Health Check
                    </a>
                </div>
            </div>
        </div>
    </footer>
    {% block extra_js %}{% endblock %}
 </body>
 </html>
--- a/backend/websites/init.py
+++ b/backend/websites/init.py
@ -0,0 +1,5 @@
 """
 Websites app initialization.
 """
 default_app_config = 'websites.apps.WebsitesConfig'
--- a/backend/websites/admin.py
+++ b/backend/websites/admin.py
@ -0,0 +1,93 @@
 """
 Django admin configuration for Website Analyzer models.
 """
 from django.contrib import admin
 from .models import Website, Scan, Issue, Metric
@admin.register(Website)
 class WebsiteAdmin(admin.ModelAdmin):
    list_display = ('url', 'domain', 'created_at', 'last_scanned_at')
    list_filter = ('created_at', 'last_scanned_at')
    search_fields = ('url', 'domain')
    readonly_fields = ('id', 'created_at', 'domain')
    ordering = ('-created_at',)
 class IssueInline(admin.TabularInline):
    model = Issue
    extra = 0
    readonly_fields = ('id', 'category', 'severity', 'tool', 'title', 'created_at')
    can_delete = False
    show_change_link = True
    max_num = 10
 class MetricInline(admin.TabularInline):
    model = Metric
    extra = 0
    readonly_fields = ('id', 'name', 'display_name', 'value', 'unit', 'source', 'score')
    can_delete = False
    max_num = 15
@admin.register(Scan)
 class ScanAdmin(admin.ModelAdmin):
    list_display = (
        'id', 'website', 'status', 'overall_score',
        'performance_score', 'security_score', 'created_at'
    )
    list_filter = ('status', 'created_at')
    search_fields = ('website__url', 'website__domain')
    readonly_fields = (
        'id', 'created_at', 'started_at', 'completed_at',
        'celery_task_id', 'raw_lighthouse_data', 'raw_zap_data',
        'raw_playwright_data', 'raw_headers_data'
    )
    inlines = [IssueInline, MetricInline]
    ordering = ('-created_at',)
    fieldsets = (
        ('Basic Info', {
            'fields': ('id', 'website', 'status', 'celery_task_id')
        }),
        ('Timestamps', {
            'fields': ('created_at', 'started_at', 'completed_at')
        }),
        ('Scores', {
            'fields': (
                'overall_score', 'performance_score', 'accessibility_score',
                'seo_score', 'best_practices_score', 'security_score'
            )
        }),
        ('Errors', {
            'fields': ('error_message',),
            'classes': ('collapse',)
        }),
        ('Raw Data', {
            'fields': (
                'raw_lighthouse_data', 'raw_zap_data',
                'raw_playwright_data', 'raw_headers_data'
            ),
            'classes': ('collapse',)
        }),
    )
@admin.register(Issue)
 class IssueAdmin(admin.ModelAdmin):
    list_display = ('title', 'scan', 'category', 'severity', 'tool', 'created_at')
    list_filter = ('category', 'severity', 'tool', 'created_at')
    search_fields = ('title', 'description', 'scan__website__url')
    readonly_fields = ('id', 'created_at', 'raw_data')
    ordering = ('severity', '-created_at')
@admin.register(Metric)
 class MetricAdmin(admin.ModelAdmin):
    list_display = ('display_name', 'scan', 'value', 'unit', 'source', 'score')
    list_filter = ('source', 'unit')
    search_fields = ('name', 'display_name', 'scan__website__url')
    readonly_fields = ('id', 'created_at')
    ordering = ('name',)
--- a/backend/websites/apps.py
+++ b/backend/websites/apps.py
@ -0,0 +1,11 @@
 """
 Websites app configuration.
 """
 from django.apps import AppConfig
 class WebsitesConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
    name = 'websites'
    verbose_name = 'Website Scanner'
--- a/backend/websites/models.py
+++ b/backend/websites/models.py
@ -0,0 +1,493 @@
 """
 Database models for Website Analyzer.
 This module defines the core data models for storing websites, scans,
 issues, and metrics from various scanning tools.
 """
 import uuid
 from django.db import models
 from django.utils import timezone
 from django.core.validators import URLValidator
 class Website(models.Model):
    """
    Represents a website that has been scanned.
    Each unique URL gets one Website record, which can have multiple
    Scan records associated with it.
    """
    id = models.UUIDField(
        primary_key=True,
        default=uuid.uuid4,
        editable=False,
        help_text="Unique identifier for the website"
    )
    url = models.URLField(
        max_length=2048,
        unique=True,
        validators=[URLValidator(schemes=['http', 'https'])],
        help_text="The normalized URL of the website"
    )
    domain = models.CharField(
        max_length=255,
        db_index=True,
        help_text="The domain extracted from the URL"
    )
    created_at = models.DateTimeField(
        auto_now_add=True,
        help_text="When the website was first added"
    )
    last_scanned_at = models.DateTimeField(
        null=True,
        blank=True,
        help_text="When the website was last scanned"
    )
    class Meta:
        db_table = 'websites'
        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['domain']),
            models.Index(fields=['-last_scanned_at']),
        ]
    def __str__(self):
        return self.url
    def save(self, *args, **kwargs):
        """Extract domain from URL before saving."""
        if self.url:
            from urllib.parse import urlparse
            parsed = urlparse(self.url)
            self.domain = parsed.netloc.lower()
        super().save(*args, **kwargs)
 class ScanStatus(models.TextChoices):
    """Enumeration of possible scan statuses."""
    PENDING = 'pending', 'Pending'
    RUNNING = 'running', 'Running'
    DONE = 'done', 'Completed'
    FAILED = 'failed', 'Failed'
    PARTIAL = 'partial', 'Partially Completed'
 class Scan(models.Model):
    """
    Represents a single scan of a website.
    Contains aggregated scores from various scanning tools and
    links to detailed issues and metrics.
    """
    id = models.UUIDField(
        primary_key=True,
        default=uuid.uuid4,
        editable=False,
        help_text="Unique identifier for the scan"
    )
    website = models.ForeignKey(
        Website,
        on_delete=models.CASCADE,
        related_name='scans',
        help_text="The website that was scanned"
    )
    status = models.CharField(
        max_length=20,
        choices=ScanStatus.choices,
        default=ScanStatus.PENDING,
        db_index=True,
        help_text="Current status of the scan"
    )
    # Celery task tracking
    celery_task_id = models.CharField(
        max_length=255,
        null=True,
        blank=True,
        help_text="Celery task ID for tracking"
    )
    # Timestamps
    created_at = models.DateTimeField(
        auto_now_add=True,
        help_text="When the scan was created"
    )
    started_at = models.DateTimeField(
        null=True,
        blank=True,
        help_text="When the scan started running"
    )
    completed_at = models.DateTimeField(
        null=True,
        blank=True,
        help_text="When the scan completed"
    )
    # Aggregated scores (0-100)
    performance_score = models.IntegerField(
        null=True,
        blank=True,
        help_text="Lighthouse performance score (0-100)"
    )
    accessibility_score = models.IntegerField(
        null=True,
        blank=True,
        help_text="Lighthouse accessibility score (0-100)"
    )
    seo_score = models.IntegerField(
        null=True,
        blank=True,
        help_text="Lighthouse SEO score (0-100)"
    )
    best_practices_score = models.IntegerField(
        null=True,
        blank=True,
        help_text="Lighthouse best practices score (0-100)"
    )
    security_score = models.IntegerField(
        null=True,
        blank=True,
        help_text="Computed security score based on issues (0-100)"
    )
    # Overall health score (computed average)
    overall_score = models.IntegerField(
        null=True,
        blank=True,
        help_text="Overall health score (0-100)"
    )
    # Error tracking
    error_message = models.TextField(
        null=True,
        blank=True,
        help_text="Error message if scan failed"
    )
    # Raw data from scanners
    raw_lighthouse_data = models.JSONField(
        null=True,
        blank=True,
        help_text="Raw Lighthouse report data"
    )
    raw_zap_data = models.JSONField(
        null=True,
        blank=True,
        help_text="Raw OWASP ZAP report data"
    )
    raw_playwright_data = models.JSONField(
        null=True,
        blank=True,
        help_text="Raw Playwright analysis data"
    )
    raw_headers_data = models.JSONField(
        null=True,
        blank=True,
        help_text="Raw HTTP headers analysis data"
    )
    class Meta:
        db_table = 'scans'
        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['status']),
            models.Index(fields=['-created_at']),
            models.Index(fields=['website', '-created_at']),
        ]
    def __str__(self):
        return f"Scan {self.id} - {self.website.url} ({self.status})"
    def calculate_overall_score(self):
        """
        Calculate overall health score as weighted average of all scores.
        Weights:
        - Performance: 25%
        - Security: 30%
        - Accessibility: 15%
        - SEO: 15%
        - Best Practices: 15%
        """
        scores = [
            (self.performance_score, 0.25),
            (self.security_score, 0.30),
            (self.accessibility_score, 0.15),
            (self.seo_score, 0.15),
            (self.best_practices_score, 0.15),
        ]
        total_weight = 0
        weighted_sum = 0
        for score, weight in scores:
            if score is not None:
                weighted_sum += score * weight
                total_weight += weight
        if total_weight > 0:
            self.overall_score = round(weighted_sum / total_weight)
        else:
            self.overall_score = None
        return self.overall_score
    def calculate_security_score(self):
        """
        Calculate security score based on security issues found.
        Starts at 100 and deducts points based on issue severity:
        - Critical: -25 points each
        - High: -15 points each
        - Medium: -8 points each
        - Low: -3 points each
        - Info: -1 point each
        """
        deductions = {
            'critical': 25,
            'high': 15,
            'medium': 8,
            'low': 3,
            'info': 1,
        }
        score = 100
        security_issues = self.issues.filter(
            category__in=['security', 'headers', 'tls', 'cors']
        )
        for issue in security_issues:
            score -= deductions.get(issue.severity, 0)
        self.security_score = max(0, score)
        return self.security_score
 class IssueCategory(models.TextChoices):
    """Categories of issues that can be detected."""
    PERFORMANCE = 'performance', 'Performance'
    SECURITY = 'security', 'Security'
    HEADERS = 'headers', 'HTTP Headers'
    TLS = 'tls', 'TLS/SSL'
    CORS = 'cors', 'CORS'
    ACCESSIBILITY = 'accessibility', 'Accessibility'
    SEO = 'seo', 'SEO'
    BEST_PRACTICES = 'best_practices', 'Best Practices'
    CONTENT = 'content', 'Content'
    RESOURCES = 'resources', 'Resources'
 class IssueSeverity(models.TextChoices):
    """Severity levels for issues."""
    CRITICAL = 'critical', 'Critical'
    HIGH = 'high', 'High'
    MEDIUM = 'medium', 'Medium'
    LOW = 'low', 'Low'
    INFO = 'info', 'Informational'
 class ScannerTool(models.TextChoices):
    """Scanner tools that can detect issues."""
    LIGHTHOUSE = 'lighthouse', 'Google Lighthouse'
    ZAP = 'owasp_zap', 'OWASP ZAP'
    PLAYWRIGHT = 'playwright', 'Playwright'
    HEADER_CHECK = 'header_check', 'HTTP Header Check'
    TLS_CHECK = 'tls_check', 'TLS/SSL Check'
 class Issue(models.Model):
    """
    Represents a specific issue found during a scan.
    Issues are categorized by type, severity, and the tool that detected them.
    Each issue includes a description and suggested remediation.
    """
    id = models.UUIDField(
        primary_key=True,
        default=uuid.uuid4,
        editable=False
    )
    scan = models.ForeignKey(
        Scan,
        on_delete=models.CASCADE,
        related_name='issues',
        help_text="The scan that found this issue"
    )
    # Classification
    category = models.CharField(
        max_length=30,
        choices=IssueCategory.choices,
        db_index=True,
        help_text="Category of the issue"
    )
    severity = models.CharField(
        max_length=20,
        choices=IssueSeverity.choices,
        db_index=True,
        help_text="Severity level of the issue"
    )
    tool = models.CharField(
        max_length=30,
        choices=ScannerTool.choices,
        help_text="Tool that detected this issue"
    )
    # Issue details
    title = models.CharField(
        max_length=500,
        help_text="Brief title of the issue"
    )
    description = models.TextField(
        help_text="Detailed description of the issue"
    )
    affected_url = models.URLField(
        max_length=2048,
        null=True,
        blank=True,
        help_text="Specific URL affected by this issue"
    )
    remediation = models.TextField(
        null=True,
        blank=True,
        help_text="Suggested fix or remediation"
    )
    # Additional data from scanner
    raw_data = models.JSONField(
        null=True,
        blank=True,
        help_text="Raw data from the scanner for this issue"
    )
    # Timestamps
    created_at = models.DateTimeField(
        auto_now_add=True
    )
    class Meta:
        db_table = 'issues'
        ordering = ['severity', '-created_at']
        indexes = [
            models.Index(fields=['scan', 'category']),
            models.Index(fields=['scan', 'severity']),
            models.Index(fields=['tool']),
        ]
    def __str__(self):
        return f"[{self.severity}] {self.title}"
 class MetricUnit(models.TextChoices):
    """Units of measurement for metrics."""
    MILLISECONDS = 'ms', 'Milliseconds'
    SECONDS = 's', 'Seconds'
    BYTES = 'bytes', 'Bytes'
    KILOBYTES = 'kb', 'Kilobytes'
    MEGABYTES = 'mb', 'Megabytes'
    SCORE = 'score', 'Score (0-1)'
    PERCENT = 'percent', 'Percentage'
    COUNT = 'count', 'Count'
 class Metric(models.Model):
    """
    Represents a specific metric measured during a scan.
    Metrics are numerical values with units, such as page load time,
    total byte weight, number of requests, etc.
    """
    id = models.UUIDField(
        primary_key=True,
        default=uuid.uuid4,
        editable=False
    )
    scan = models.ForeignKey(
        Scan,
        on_delete=models.CASCADE,
        related_name='metrics',
        help_text="The scan that measured this metric"
    )
    # Metric identification
    name = models.CharField(
        max_length=100,
        db_index=True,
        help_text="Name of the metric (e.g., 'first_contentful_paint_ms')"
    )
    display_name = models.CharField(
        max_length=200,
        help_text="Human-readable name for display"
    )
    # Value
    value = models.FloatField(
        help_text="Numeric value of the metric"
    )
    unit = models.CharField(
        max_length=20,
        choices=MetricUnit.choices,
        help_text="Unit of measurement"
    )
    # Source
    source = models.CharField(
        max_length=30,
        choices=ScannerTool.choices,
        help_text="Tool that provided this metric"
    )
    # Score (if applicable)
    score = models.FloatField(
        null=True,
        blank=True,
        help_text="Lighthouse score for this metric (0-1)"
    )
    # Timestamp
    created_at = models.DateTimeField(
        auto_now_add=True
    )
    class Meta:
        db_table = 'metrics'
        ordering = ['name']
        indexes = [
            models.Index(fields=['scan', 'name']),
            models.Index(fields=['source']),
        ]
        # Ensure unique metric names per scan
        constraints = [
            models.UniqueConstraint(
                fields=['scan', 'name'],
                name='unique_metric_per_scan'
            )
        ]
    def __str__(self):
        return f"{self.display_name}: {self.value} {self.unit}"
    def get_formatted_value(self):
        """Return a formatted string representation of the value."""
        if self.unit == MetricUnit.MILLISECONDS:
            if self.value >= 1000:
                return f"{self.value / 1000:.2f}s"
            return f"{self.value:.0f}ms"
        elif self.unit == MetricUnit.BYTES:
            if self.value >= 1024 * 1024:
                return f"{self.value / (1024 * 1024):.2f} MB"
            elif self.value >= 1024:
                return f"{self.value / 1024:.1f} KB"
            return f"{self.value:.0f} bytes"
        elif self.unit == MetricUnit.PERCENT:
            return f"{self.value:.1f}%"
        elif self.unit == MetricUnit.SCORE:
            return f"{self.value:.3f}"
        else:
            return f"{self.value:.2f} {self.get_unit_display()}"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,160 @@
 # Website Analyzer - Docker Compose Configuration
 # This file orchestrates all services required for the application
 version: '3.9'
 services:
  # ==========================================================================
  # PostgreSQL Database
  # ==========================================================================
  db:
    image: postgres:16-alpine
    container_name: analyzer_db
    restart: unless-stopped
    environment:
      POSTGRES_USER: analyzer
      POSTGRES_PASSWORD: analyzer_password
      POSTGRES_DB: website_analyzer
    volumes:
      - postgres_data:/var/lib/postgresql/data
    ports:
      - "5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U analyzer -d website_analyzer"]
      interval: 10s
      timeout: 5s
      retries: 5
  # ==========================================================================
  # Redis - Message Broker & Cache
  # ==========================================================================
  redis:
    image: redis:7-alpine
    container_name: analyzer_redis
    restart: unless-stopped
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 5s
      retries: 5
  # ==========================================================================
  # Django Web Application
  # ==========================================================================
  web:
    build:
      context: ./backend
      dockerfile: Dockerfile
    container_name: analyzer_web
    restart: unless-stopped
    command: >
      sh -c "python manage.py migrate &&
             python manage.py collectstatic --noinput &&
             gunicorn core.wsgi:application --bind 0.0.0.0:8000 --workers 4 --threads 2"
    volumes:
      - ./backend:/app
      - static_volume:/app/staticfiles
    ports:
      - "8000:8000"
    env_file:
      - ./backend/.env
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/api/health/"]
      interval: 30s
      timeout: 10s
      retries: 3
  # ==========================================================================
  # Celery Worker - Background Task Processing
  # ==========================================================================
  celery_worker:
    build:
      context: ./backend
      dockerfile: Dockerfile
    container_name: analyzer_celery_worker
    restart: unless-stopped
    command: celery -A core worker -l INFO --concurrency=2
    volumes:
      - ./backend:/app
    env_file:
      - ./backend/.env
    depends_on:
      - db
      - redis
      - web
  # ==========================================================================
  # Celery Beat - Scheduled Tasks (Optional)
  # ==========================================================================
  celery_beat:
    build:
      context: ./backend
      dockerfile: Dockerfile
    container_name: analyzer_celery_beat
    restart: unless-stopped
    command: celery -A core beat -l INFO
    volumes:
      - ./backend:/app
    env_file:
      - ./backend/.env
    depends_on:
      - db
      - redis
      - celery_worker
  # ==========================================================================
  # OWASP ZAP - Security Scanner
  # ==========================================================================
  zap:
    image: ghcr.io/zaproxy/zaproxy:stable
    container_name: analyzer_zap
    restart: unless-stopped
    command: zap.sh -daemon -host 0.0.0.0 -port 8080 -config api.key=zap-api-key-change-me -config api.addrs.addr.name=.* -config api.addrs.addr.regex=true
    ports:
      - "8081:8080"
    volumes:
      - zap_data:/home/zap/.ZAP
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/JSON/core/view/version/?apikey=zap-api-key-change-me"]
      interval: 30s
      timeout: 10s
      retries: 5
  # ==========================================================================
  # Lighthouse Scanner Service (Node.js)
  # ==========================================================================
  lighthouse:
    build:
      context: ./lighthouse
      dockerfile: Dockerfile
    container_name: analyzer_lighthouse
    restart: unless-stopped
    ports:
      - "3001:3001"
    volumes:
      - lighthouse_reports:/app/reports
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3001/health"]
      interval: 30s
      timeout: 10s
      retries: 3
 volumes:
  postgres_data:
  redis_data:
  static_volume:
  zap_data:
  lighthouse_reports:
 networks:
  default:
    name: analyzer_network
--- a/lighthouse/Dockerfile
+++ b/lighthouse/Dockerfile
@ -0,0 +1,54 @@
 # Lighthouse Scanner Service - Dockerfile
 # Node.js service that runs Lighthouse CLI and provides HTTP API
 FROM node:20-slim
 # Install Chrome dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    chromium \
    fonts-liberation \
    libappindicator3-1 \
    libasound2 \
    libatk-bridge2.0-0 \
    libatk1.0-0 \
    libcups2 \
    libdbus-1-3 \
    libdrm2 \
    libgbm1 \
    libgtk-3-0 \
    libnspr4 \
    libnss3 \
    libxcomposite1 \
    libxdamage1 \
    libxfixes3 \
    libxkbcommon0 \
    libxrandr2 \
    xdg-utils \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Set Chrome path for Lighthouse
 ENV CHROME_PATH=/usr/bin/chromium
 WORKDIR /app
 # Copy package files
 COPY package*.json ./
 # Install dependencies
 RUN npm ci --only=production
 # Copy application code
 COPY . .
 # Create reports directory
 RUN mkdir -p reports
 # Create non-root user
 RUN useradd -m -u 1000 lighthouse && \
    chown -R lighthouse:lighthouse /app
 USER lighthouse
 EXPOSE 3001
 CMD ["node", "server.js"]
--- a/lighthouse/package.json
+++ b/lighthouse/package.json
@ -0,0 +1,19 @@
 {
  "name": "lighthouse-scanner",
  "version": "1.0.0",
  "description": "Lighthouse scanner service for Website Analyzer",
  "main": "server.js",
  "scripts": {
    "start": "node server.js",
    "dev": "node --watch server.js"
  },
  "dependencies": {
    "express": "^4.18.2",
    "lighthouse": "^11.4.0",
    "chrome-launcher": "^1.1.0",
    "uuid": "^9.0.0"
  },
  "engines": {
    "node": ">=18.0.0"
  }
 }
--- a/lighthouse/server.js
+++ b/lighthouse/server.js
@ -0,0 +1,328 @@
 /**
 * Lighthouse Scanner Service
 * 
 * This service provides an HTTP API for running Lighthouse audits.
 * It's designed to be called from the Django backend via Celery tasks.
 */
 const express = require('express');
 const lighthouse = require('lighthouse');
 const chromeLauncher = require('chrome-launcher');
 const { v4: uuidv4 } = require('uuid');
 const fs = require('fs').promises;
 const path = require('path');
 const app = express();
 app.use(express.json());
 const PORT = process.env.PORT || 3001;
 const REPORTS_DIR = path.join(__dirname, 'reports');
 // Ensure reports directory exists
 fs.mkdir(REPORTS_DIR, { recursive: true }).catch(console.error);
 /**
 * Health check endpoint
 */
 app.get('/health', (req, res) => {
    res.json({ status: 'healthy', service: 'lighthouse-scanner' });
 });
 /**
 * Run Lighthouse audit for a given URL
 * 
 * POST /scan
 * Body: { "url": "https://example.com" }
 * 
 * Returns: Lighthouse audit results as JSON
 */
 app.post('/scan', async (req, res) => {
    const { url } = req.body;
    if (!url) {
        return res.status(400).json({ error: 'URL is required' });
    }
    // Validate URL format
    try {
        new URL(url);
    } catch (e) {
        return res.status(400).json({ error: 'Invalid URL format' });
    }
    const scanId = uuidv4();
    console.log(`[${scanId}] Starting Lighthouse scan for: ${url}`);
    let chrome = null;
    try {
        // Launch Chrome
        chrome = await chromeLauncher.launch({
            chromeFlags: [
                '--headless',
                '--disable-gpu',
                '--no-sandbox',
                '--disable-dev-shm-usage',
                '--disable-extensions',
                '--disable-background-networking',
                '--disable-sync',
                '--disable-translate',
                '--metrics-recording-only',
                '--mute-audio',
                '--no-first-run',
                '--safebrowsing-disable-auto-update'
            ]
        });
        console.log(`[${scanId}] Chrome launched on port ${chrome.port}`);
        // Lighthouse configuration
        const options = {
            logLevel: 'error',
            output: 'json',
            port: chrome.port,
            onlyCategories: ['performance', 'accessibility', 'best-practices', 'seo'],
            // Throttling settings for more realistic results
            throttling: {
                cpuSlowdownMultiplier: 4,
                downloadThroughputKbps: 1638.4,
                uploadThroughputKbps: 675,
                rttMs: 150
            },
            screenEmulation: {
                mobile: false,
                width: 1920,
                height: 1080,
                deviceScaleFactor: 1,
                disabled: false
            },
            formFactor: 'desktop'
        };
        // Run Lighthouse
        const runnerResult = await lighthouse(url, options);
        // Extract the report
        const report = runnerResult.lhr;
        // Process and extract key metrics
        const result = {
            scanId,
            url: report.finalUrl || url,
            fetchTime: report.fetchTime,
            // Category scores (0-100)
            scores: {
                performance: Math.round((report.categories.performance?.score || 0) * 100),
                accessibility: Math.round((report.categories.accessibility?.score || 0) * 100),
                bestPractices: Math.round((report.categories['best-practices']?.score || 0) * 100),
                seo: Math.round((report.categories.seo?.score || 0) * 100)
            },
            // Core Web Vitals and key metrics
            metrics: {
                firstContentfulPaint: {
                    value: report.audits['first-contentful-paint']?.numericValue || null,
                    unit: 'ms',
                    score: report.audits['first-contentful-paint']?.score || null
                },
                largestContentfulPaint: {
                    value: report.audits['largest-contentful-paint']?.numericValue || null,
                    unit: 'ms',
                    score: report.audits['largest-contentful-paint']?.score || null
                },
                speedIndex: {
                    value: report.audits['speed-index']?.numericValue || null,
                    unit: 'ms',
                    score: report.audits['speed-index']?.score || null
                },
                timeToInteractive: {
                    value: report.audits['interactive']?.numericValue || null,
                    unit: 'ms',
                    score: report.audits['interactive']?.score || null
                },
                totalBlockingTime: {
                    value: report.audits['total-blocking-time']?.numericValue || null,
                    unit: 'ms',
                    score: report.audits['total-blocking-time']?.score || null
                },
                cumulativeLayoutShift: {
                    value: report.audits['cumulative-layout-shift']?.numericValue || null,
                    unit: 'score',
                    score: report.audits['cumulative-layout-shift']?.score || null
                }
            },
            // JavaScript and resource audits
            resources: {
                totalByteWeight: report.audits['total-byte-weight']?.numericValue || null,
                bootupTime: report.audits['bootup-time']?.numericValue || null,
                mainThreadWork: report.audits['mainthread-work-breakdown']?.numericValue || null,
                // Unused resources
                unusedJavascript: extractUnusedResources(report.audits['unused-javascript']),
                unusedCss: extractUnusedResources(report.audits['unused-css-rules']),
                // Render blocking resources
                renderBlockingResources: extractRenderBlockingResources(report.audits['render-blocking-resources']),
                // Large bundles
                scriptTreemap: extractLargeScripts(report.audits['script-treemap-data']),
                // Third party usage
                thirdPartySummary: extractThirdPartySummary(report.audits['third-party-summary'])
            },
            // Diagnostics
            diagnostics: {
                numRequests: report.audits['network-requests']?.details?.items?.length || 0,
                numScripts: countResourcesByType(report.audits['network-requests'], 'Script'),
                numStylesheets: countResourcesByType(report.audits['network-requests'], 'Stylesheet'),
                numImages: countResourcesByType(report.audits['network-requests'], 'Image'),
                numFonts: countResourcesByType(report.audits['network-requests'], 'Font'),
                totalTransferSize: report.audits['total-byte-weight']?.numericValue || 0
            },
            // Failed audits (potential issues)
            issues: extractFailedAudits(report)
        };
        // Save full report to file for debugging
        const reportPath = path.join(REPORTS_DIR, `${scanId}.json`);
        await fs.writeFile(reportPath, JSON.stringify(report, null, 2));
        console.log(`[${scanId}] Scan completed successfully`);
        res.json(result);
    } catch (error) {
        console.error(`[${scanId}] Scan failed:`, error);
        res.status(500).json({
            error: 'Lighthouse scan failed',
            message: error.message,
            scanId
        });
    } finally {
        if (chrome) {
            await chrome.kill();
        }
    }
 });
 /**
 * Get a saved report by ID
 */
 app.get('/report/:scanId', async (req, res) => {
    const { scanId } = req.params;
    const reportPath = path.join(REPORTS_DIR, `${scanId}.json`);
    try {
        const report = await fs.readFile(reportPath, 'utf8');
        res.json(JSON.parse(report));
    } catch (error) {
        res.status(404).json({ error: 'Report not found' });
    }
 });
 // =============================================================================
 // Helper Functions
 // =============================================================================
 function extractUnusedResources(audit) {
    if (!audit?.details?.items) return [];
    return audit.details.items.slice(0, 10).map(item => ({
        url: item.url,
        totalBytes: item.totalBytes,
        wastedBytes: item.wastedBytes,
        wastedPercent: item.wastedPercent
    }));
 }
 function extractRenderBlockingResources(audit) {
    if (!audit?.details?.items) return [];
    return audit.details.items.map(item => ({
        url: item.url,
        wastedMs: item.wastedMs,
        totalBytes: item.totalBytes
    }));
 }
 function extractLargeScripts(audit) {
    if (!audit?.details?.nodes) return [];
    // Get scripts larger than 100KB
    const largeScripts = [];
    const processNode = (node, path = '') => {
        const currentPath = path ? `${path}/${node.name}` : node.name;
        if (node.resourceBytes > 100 * 1024) {
            largeScripts.push({
                name: currentPath,
                resourceBytes: node.resourceBytes,
                unusedBytes: node.unusedBytes || 0
            });
        }
        if (node.children) {
            node.children.forEach(child => processNode(child, currentPath));
        }
    };
    audit.details.nodes.forEach(node => processNode(node));
    return largeScripts.slice(0, 20);
 }
 function extractThirdPartySummary(audit) {
    if (!audit?.details?.items) return [];
    return audit.details.items.slice(0, 10).map(item => ({
        entity: item.entity,
        transferSize: item.transferSize,
        blockingTime: item.blockingTime,
        mainThreadTime: item.mainThreadTime
    }));
 }
 function countResourcesByType(audit, type) {
    if (!audit?.details?.items) return 0;
    return audit.details.items.filter(item => item.resourceType === type).length;
 }
 function extractFailedAudits(report) {
    const issues = [];
    const categoriesToCheck = ['performance', 'accessibility', 'best-practices', 'seo'];
    categoriesToCheck.forEach(categoryId => {
        const category = report.categories[categoryId];
        if (!category?.auditRefs) return;
        category.auditRefs.forEach(ref => {
            const audit = report.audits[ref.id];
            // Include audits with score < 0.5 (50%)
            if (audit && audit.score !== null && audit.score < 0.5) {
                issues.push({
                    id: audit.id,
                    category: categoryId,
                    title: audit.title,
                    description: audit.description,
                    score: audit.score,
                    displayValue: audit.displayValue,
                    impact: ref.weight || 0
                });
            }
        });
    });
    // Sort by impact (weight) descending
    issues.sort((a, b) => b.impact - a.impact);
    return issues.slice(0, 30);
 }
 // Start the server
 app.listen(PORT, '0.0.0.0', () => {
    console.log(`Lighthouse Scanner Service running on port ${PORT}`);
 });