Initial commit: Lighthouse scanner service
This commit is contained in:
commit
90ad47a721
|
|
@ -0,0 +1,27 @@
|
||||||
|
# Django Core Settings
|
||||||
|
DEBUG=True
|
||||||
|
SECRET_KEY=your-secret-key-change-in-production-abc123xyz789
|
||||||
|
ALLOWED_HOSTS=localhost,127.0.0.1,web
|
||||||
|
|
||||||
|
# Database
|
||||||
|
DATABASE_URL=postgres://analyzer:analyzer_password@db:5432/website_analyzer
|
||||||
|
|
||||||
|
# Redis & Celery
|
||||||
|
REDIS_URL=redis://redis:6379/0
|
||||||
|
CELERY_BROKER_URL=redis://redis:6379/0
|
||||||
|
CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||||
|
|
||||||
|
# OWASP ZAP Configuration
|
||||||
|
ZAP_API_KEY=zap-api-key-change-me
|
||||||
|
ZAP_HOST=http://zap:8080
|
||||||
|
|
||||||
|
# Lighthouse Configuration
|
||||||
|
LIGHTHOUSE_CHROME_FLAGS=--headless --no-sandbox --disable-gpu
|
||||||
|
|
||||||
|
# Scan Settings
|
||||||
|
MAX_SCAN_TIME_SECONDS=300
|
||||||
|
SCAN_RATE_LIMIT_MINUTES=5
|
||||||
|
MAX_CONCURRENT_SCANS=3
|
||||||
|
|
||||||
|
# Security
|
||||||
|
CORS_ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8000
|
||||||
|
|
@ -0,0 +1,84 @@
|
||||||
|
# Website Analyzer Backend - Dockerfile
|
||||||
|
# Multi-stage build for efficient image size
|
||||||
|
|
||||||
|
FROM python:3.11-slim as builder
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PIP_NO_CACHE_DIR=1 \
|
||||||
|
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
build-essential \
|
||||||
|
libpq-dev \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --user -r requirements.txt
|
||||||
|
|
||||||
|
# Install Playwright and its dependencies
|
||||||
|
RUN pip install --user playwright && \
|
||||||
|
python -m playwright install chromium && \
|
||||||
|
python -m playwright install-deps chromium
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Production Stage
|
||||||
|
# ==========================================================================
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PATH="/root/.local/bin:$PATH"
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install runtime dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq5 \
|
||||||
|
curl \
|
||||||
|
# Playwright/Chromium dependencies
|
||||||
|
libnss3 \
|
||||||
|
libnspr4 \
|
||||||
|
libatk1.0-0 \
|
||||||
|
libatk-bridge2.0-0 \
|
||||||
|
libcups2 \
|
||||||
|
libdrm2 \
|
||||||
|
libdbus-1-3 \
|
||||||
|
libxkbcommon0 \
|
||||||
|
libxcomposite1 \
|
||||||
|
libxdamage1 \
|
||||||
|
libxfixes3 \
|
||||||
|
libxrandr2 \
|
||||||
|
libgbm1 \
|
||||||
|
libasound2 \
|
||||||
|
libpango-1.0-0 \
|
||||||
|
libcairo2 \
|
||||||
|
libatspi2.0-0 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy Python packages from builder
|
||||||
|
COPY --from=builder /root/.local /root/.local
|
||||||
|
COPY --from=builder /root/.cache/ms-playwright /root/.cache/ms-playwright
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create logs directory
|
||||||
|
RUN mkdir -p logs staticfiles
|
||||||
|
|
||||||
|
# Create non-root user for security
|
||||||
|
RUN useradd -m -u 1000 appuser && \
|
||||||
|
chown -R appuser:appuser /app /root/.local /root/.cache
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ["gunicorn", "core.wsgi:application", "--bind", "0.0.0.0:8000", "--workers", "4"]
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
"""
|
||||||
|
API app initialization.
|
||||||
|
"""
|
||||||
|
|
||||||
|
default_app_config = 'api.apps.ApiConfig'
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""
|
||||||
|
API app configuration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class ApiConfig(AppConfig):
|
||||||
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
|
name = 'api'
|
||||||
|
verbose_name = 'REST API'
|
||||||
|
|
@ -0,0 +1,52 @@
|
||||||
|
"""
|
||||||
|
Custom exception handler for DRF.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from rest_framework.views import exception_handler
|
||||||
|
from rest_framework.response import Response
|
||||||
|
from rest_framework import status
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def custom_exception_handler(exc, context):
|
||||||
|
"""
|
||||||
|
Custom exception handler that provides consistent error responses.
|
||||||
|
|
||||||
|
Handles common exceptions and formats them consistently.
|
||||||
|
"""
|
||||||
|
# Call REST framework's default exception handler first
|
||||||
|
response = exception_handler(exc, context)
|
||||||
|
|
||||||
|
if response is not None:
|
||||||
|
# Customize the response data
|
||||||
|
custom_response_data = {
|
||||||
|
'error': True,
|
||||||
|
'status_code': response.status_code,
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(response.data, dict):
|
||||||
|
if 'detail' in response.data:
|
||||||
|
custom_response_data['message'] = str(response.data['detail'])
|
||||||
|
else:
|
||||||
|
custom_response_data['errors'] = response.data
|
||||||
|
elif isinstance(response.data, list):
|
||||||
|
custom_response_data['errors'] = response.data
|
||||||
|
else:
|
||||||
|
custom_response_data['message'] = str(response.data)
|
||||||
|
|
||||||
|
response.data = custom_response_data
|
||||||
|
return response
|
||||||
|
|
||||||
|
# Handle unexpected exceptions
|
||||||
|
logger.exception(f"Unhandled exception: {exc}")
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
{
|
||||||
|
'error': True,
|
||||||
|
'status_code': 500,
|
||||||
|
'message': 'An unexpected error occurred',
|
||||||
|
},
|
||||||
|
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,243 @@
|
||||||
|
"""
|
||||||
|
DRF Serializers for the API.
|
||||||
|
|
||||||
|
This module defines serializers for converting model instances
|
||||||
|
to JSON and validating input data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from rest_framework import serializers
|
||||||
|
from websites.models import Website, Scan, Issue, Metric, ScanStatus
|
||||||
|
|
||||||
|
|
||||||
|
class IssueSerializer(serializers.ModelSerializer):
|
||||||
|
"""Serializer for Issue model."""
|
||||||
|
|
||||||
|
severity_display = serializers.CharField(source='get_severity_display', read_only=True)
|
||||||
|
category_display = serializers.CharField(source='get_category_display', read_only=True)
|
||||||
|
tool_display = serializers.CharField(source='get_tool_display', read_only=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = Issue
|
||||||
|
fields = [
|
||||||
|
'id',
|
||||||
|
'category',
|
||||||
|
'category_display',
|
||||||
|
'severity',
|
||||||
|
'severity_display',
|
||||||
|
'tool',
|
||||||
|
'tool_display',
|
||||||
|
'title',
|
||||||
|
'description',
|
||||||
|
'affected_url',
|
||||||
|
'remediation',
|
||||||
|
'created_at',
|
||||||
|
]
|
||||||
|
read_only_fields = fields
|
||||||
|
|
||||||
|
|
||||||
|
class MetricSerializer(serializers.ModelSerializer):
|
||||||
|
"""Serializer for Metric model."""
|
||||||
|
|
||||||
|
formatted_value = serializers.CharField(source='get_formatted_value', read_only=True)
|
||||||
|
unit_display = serializers.CharField(source='get_unit_display', read_only=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = Metric
|
||||||
|
fields = [
|
||||||
|
'id',
|
||||||
|
'name',
|
||||||
|
'display_name',
|
||||||
|
'value',
|
||||||
|
'unit',
|
||||||
|
'unit_display',
|
||||||
|
'formatted_value',
|
||||||
|
'source',
|
||||||
|
'score',
|
||||||
|
]
|
||||||
|
read_only_fields = fields
|
||||||
|
|
||||||
|
|
||||||
|
class ScanListSerializer(serializers.ModelSerializer):
|
||||||
|
"""Serializer for Scan list views (minimal data)."""
|
||||||
|
|
||||||
|
status_display = serializers.CharField(source='get_status_display', read_only=True)
|
||||||
|
website_url = serializers.CharField(source='website.url', read_only=True)
|
||||||
|
issues_count = serializers.SerializerMethodField()
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = Scan
|
||||||
|
fields = [
|
||||||
|
'id',
|
||||||
|
'website_url',
|
||||||
|
'status',
|
||||||
|
'status_display',
|
||||||
|
'created_at',
|
||||||
|
'completed_at',
|
||||||
|
'overall_score',
|
||||||
|
'performance_score',
|
||||||
|
'security_score',
|
||||||
|
'issues_count',
|
||||||
|
]
|
||||||
|
read_only_fields = fields
|
||||||
|
|
||||||
|
def get_issues_count(self, obj):
|
||||||
|
return obj.issues.count()
|
||||||
|
|
||||||
|
|
||||||
|
class ScanDetailSerializer(serializers.ModelSerializer):
|
||||||
|
"""Serializer for Scan detail views (full data)."""
|
||||||
|
|
||||||
|
status_display = serializers.CharField(source='get_status_display', read_only=True)
|
||||||
|
website_url = serializers.CharField(source='website.url', read_only=True)
|
||||||
|
website_domain = serializers.CharField(source='website.domain', read_only=True)
|
||||||
|
issues = IssueSerializer(many=True, read_only=True)
|
||||||
|
metrics = MetricSerializer(many=True, read_only=True)
|
||||||
|
issues_by_category = serializers.SerializerMethodField()
|
||||||
|
issues_by_severity = serializers.SerializerMethodField()
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = Scan
|
||||||
|
fields = [
|
||||||
|
'id',
|
||||||
|
'website_url',
|
||||||
|
'website_domain',
|
||||||
|
'status',
|
||||||
|
'status_display',
|
||||||
|
'created_at',
|
||||||
|
'started_at',
|
||||||
|
'completed_at',
|
||||||
|
'overall_score',
|
||||||
|
'performance_score',
|
||||||
|
'accessibility_score',
|
||||||
|
'seo_score',
|
||||||
|
'best_practices_score',
|
||||||
|
'security_score',
|
||||||
|
'error_message',
|
||||||
|
'issues',
|
||||||
|
'metrics',
|
||||||
|
'issues_by_category',
|
||||||
|
'issues_by_severity',
|
||||||
|
]
|
||||||
|
read_only_fields = fields
|
||||||
|
|
||||||
|
def get_issues_by_category(self, obj):
|
||||||
|
"""Group issues by category."""
|
||||||
|
from collections import defaultdict
|
||||||
|
grouped = defaultdict(list)
|
||||||
|
|
||||||
|
for issue in obj.issues.all():
|
||||||
|
grouped[issue.category].append(IssueSerializer(issue).data)
|
||||||
|
|
||||||
|
return dict(grouped)
|
||||||
|
|
||||||
|
def get_issues_by_severity(self, obj):
|
||||||
|
"""Count issues by severity."""
|
||||||
|
from django.db.models import Count
|
||||||
|
|
||||||
|
counts = obj.issues.values('severity').annotate(count=Count('id'))
|
||||||
|
return {item['severity']: item['count'] for item in counts}
|
||||||
|
|
||||||
|
|
||||||
|
class ScanCreateSerializer(serializers.Serializer):
|
||||||
|
"""Serializer for creating new scans."""
|
||||||
|
|
||||||
|
url = serializers.URLField(
|
||||||
|
required=True,
|
||||||
|
help_text="The URL to scan (must be http or https)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def validate_url(self, value):
|
||||||
|
"""Validate and normalize the URL."""
|
||||||
|
from scanner.utils import validate_url
|
||||||
|
|
||||||
|
is_valid, result = validate_url(value)
|
||||||
|
|
||||||
|
if not is_valid:
|
||||||
|
raise serializers.ValidationError(result)
|
||||||
|
|
||||||
|
return result # Return normalized URL
|
||||||
|
|
||||||
|
def create(self, validated_data):
|
||||||
|
"""Create Website and Scan records."""
|
||||||
|
from scanner.tasks import check_rate_limit, check_concurrent_scan_limit, run_scan_task
|
||||||
|
|
||||||
|
url = validated_data['url']
|
||||||
|
|
||||||
|
# Check rate limit
|
||||||
|
rate_limit_error = check_rate_limit(url)
|
||||||
|
if rate_limit_error:
|
||||||
|
raise serializers.ValidationError({'url': rate_limit_error})
|
||||||
|
|
||||||
|
# Check concurrent scan limit
|
||||||
|
concurrent_error = check_concurrent_scan_limit()
|
||||||
|
if concurrent_error:
|
||||||
|
raise serializers.ValidationError({'non_field_errors': concurrent_error})
|
||||||
|
|
||||||
|
# Get or create Website
|
||||||
|
website, created = Website.objects.get_or_create(
|
||||||
|
url=url,
|
||||||
|
defaults={'domain': validated_data.get('domain', '')}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create Scan
|
||||||
|
scan = Scan.objects.create(
|
||||||
|
website=website,
|
||||||
|
status=ScanStatus.PENDING
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trigger Celery task
|
||||||
|
task = run_scan_task.delay(str(scan.id))
|
||||||
|
|
||||||
|
# Update scan with task ID
|
||||||
|
scan.celery_task_id = task.id
|
||||||
|
scan.save(update_fields=['celery_task_id'])
|
||||||
|
|
||||||
|
return scan
|
||||||
|
|
||||||
|
|
||||||
|
class WebsiteSerializer(serializers.ModelSerializer):
|
||||||
|
"""Serializer for Website model."""
|
||||||
|
|
||||||
|
scans_count = serializers.SerializerMethodField()
|
||||||
|
latest_scan = serializers.SerializerMethodField()
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = Website
|
||||||
|
fields = [
|
||||||
|
'id',
|
||||||
|
'url',
|
||||||
|
'domain',
|
||||||
|
'created_at',
|
||||||
|
'last_scanned_at',
|
||||||
|
'scans_count',
|
||||||
|
'latest_scan',
|
||||||
|
]
|
||||||
|
read_only_fields = fields
|
||||||
|
|
||||||
|
def get_scans_count(self, obj):
|
||||||
|
return obj.scans.count()
|
||||||
|
|
||||||
|
def get_latest_scan(self, obj):
|
||||||
|
latest = obj.scans.first()
|
||||||
|
if latest:
|
||||||
|
return ScanListSerializer(latest).data
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class WebsiteDetailSerializer(WebsiteSerializer):
|
||||||
|
"""Detailed Website serializer with scan list."""
|
||||||
|
|
||||||
|
scans = ScanListSerializer(many=True, read_only=True)
|
||||||
|
|
||||||
|
class Meta(WebsiteSerializer.Meta):
|
||||||
|
fields = WebsiteSerializer.Meta.fields + ['scans']
|
||||||
|
|
||||||
|
|
||||||
|
class HealthCheckSerializer(serializers.Serializer):
|
||||||
|
"""Serializer for health check response."""
|
||||||
|
|
||||||
|
status = serializers.CharField()
|
||||||
|
database = serializers.CharField()
|
||||||
|
redis = serializers.CharField()
|
||||||
|
celery = serializers.CharField()
|
||||||
|
timestamp = serializers.DateTimeField()
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
"""
|
||||||
|
URL routing for the API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.urls import path, include
|
||||||
|
from rest_framework.routers import DefaultRouter
|
||||||
|
from . import views
|
||||||
|
|
||||||
|
router = DefaultRouter()
|
||||||
|
router.register(r'scans', views.ScanViewSet, basename='scan')
|
||||||
|
router.register(r'websites', views.WebsiteViewSet, basename='website')
|
||||||
|
router.register(r'issues', views.IssueViewSet, basename='issue')
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
path('', views.api_root, name='api-root'),
|
||||||
|
path('health/', views.health_check, name='health-check'),
|
||||||
|
path('', include(router.urls)),
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,336 @@
|
||||||
|
"""
|
||||||
|
DRF Views for the API.
|
||||||
|
|
||||||
|
This module defines API views for scans, websites, and issues.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from django.db import connection
|
||||||
|
from django.utils import timezone
|
||||||
|
from django.core.cache import cache
|
||||||
|
from rest_framework import viewsets, status, generics
|
||||||
|
from rest_framework.decorators import api_view, action
|
||||||
|
from rest_framework.response import Response
|
||||||
|
from rest_framework.pagination import PageNumberPagination
|
||||||
|
from rest_framework.throttling import AnonRateThrottle
|
||||||
|
|
||||||
|
from websites.models import Website, Scan, Issue, Metric
|
||||||
|
from .serializers import (
|
||||||
|
WebsiteSerializer,
|
||||||
|
WebsiteDetailSerializer,
|
||||||
|
ScanListSerializer,
|
||||||
|
ScanDetailSerializer,
|
||||||
|
ScanCreateSerializer,
|
||||||
|
IssueSerializer,
|
||||||
|
MetricSerializer,
|
||||||
|
HealthCheckSerializer,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ScanRateThrottle(AnonRateThrottle):
|
||||||
|
"""Custom throttle for scan creation."""
|
||||||
|
rate = '10/hour'
|
||||||
|
|
||||||
|
|
||||||
|
class StandardResultsPagination(PageNumberPagination):
|
||||||
|
"""Standard pagination for list views."""
|
||||||
|
page_size = 20
|
||||||
|
page_size_query_param = 'page_size'
|
||||||
|
max_page_size = 100
|
||||||
|
|
||||||
|
|
||||||
|
class ScanViewSet(viewsets.ModelViewSet):
|
||||||
|
"""
|
||||||
|
ViewSet for Scan operations.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
- POST /api/scans/ - Create a new scan
|
||||||
|
- GET /api/scans/ - List all scans
|
||||||
|
- GET /api/scans/{id}/ - Get scan details
|
||||||
|
- DELETE /api/scans/{id}/ - Delete a scan
|
||||||
|
"""
|
||||||
|
|
||||||
|
queryset = Scan.objects.select_related('website').prefetch_related('issues', 'metrics')
|
||||||
|
pagination_class = StandardResultsPagination
|
||||||
|
|
||||||
|
def get_serializer_class(self):
|
||||||
|
if self.action == 'list':
|
||||||
|
return ScanListSerializer
|
||||||
|
elif self.action == 'create':
|
||||||
|
return ScanCreateSerializer
|
||||||
|
return ScanDetailSerializer
|
||||||
|
|
||||||
|
def get_throttles(self):
|
||||||
|
if self.action == 'create':
|
||||||
|
return [ScanRateThrottle()]
|
||||||
|
return super().get_throttles()
|
||||||
|
|
||||||
|
def create(self, request, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Create a new scan.
|
||||||
|
|
||||||
|
Request body:
|
||||||
|
```json
|
||||||
|
{"url": "https://example.com"}
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns the created scan with pending status.
|
||||||
|
The scan will be processed asynchronously.
|
||||||
|
"""
|
||||||
|
serializer = self.get_serializer(data=request.data)
|
||||||
|
serializer.is_valid(raise_exception=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
scan = serializer.save()
|
||||||
|
|
||||||
|
# Return the created scan details
|
||||||
|
response_serializer = ScanDetailSerializer(scan)
|
||||||
|
return Response(
|
||||||
|
response_serializer.data,
|
||||||
|
status=status.HTTP_201_CREATED
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Error creating scan")
|
||||||
|
return Response(
|
||||||
|
{'error': str(e)},
|
||||||
|
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
@action(detail=True, methods=['get'])
|
||||||
|
def issues(self, request, pk=None):
|
||||||
|
"""Get all issues for a scan."""
|
||||||
|
scan = self.get_object()
|
||||||
|
issues = scan.issues.all()
|
||||||
|
|
||||||
|
# Optional filtering
|
||||||
|
category = request.query_params.get('category')
|
||||||
|
severity = request.query_params.get('severity')
|
||||||
|
tool = request.query_params.get('tool')
|
||||||
|
|
||||||
|
if category:
|
||||||
|
issues = issues.filter(category=category)
|
||||||
|
if severity:
|
||||||
|
issues = issues.filter(severity=severity)
|
||||||
|
if tool:
|
||||||
|
issues = issues.filter(tool=tool)
|
||||||
|
|
||||||
|
serializer = IssueSerializer(issues, many=True)
|
||||||
|
return Response(serializer.data)
|
||||||
|
|
||||||
|
@action(detail=True, methods=['get'])
|
||||||
|
def metrics(self, request, pk=None):
|
||||||
|
"""Get all metrics for a scan."""
|
||||||
|
scan = self.get_object()
|
||||||
|
metrics = scan.metrics.all()
|
||||||
|
|
||||||
|
# Optional filtering by source
|
||||||
|
source = request.query_params.get('source')
|
||||||
|
if source:
|
||||||
|
metrics = metrics.filter(source=source)
|
||||||
|
|
||||||
|
serializer = MetricSerializer(metrics, many=True)
|
||||||
|
return Response(serializer.data)
|
||||||
|
|
||||||
|
@action(detail=True, methods=['get'])
|
||||||
|
def status(self, request, pk=None):
|
||||||
|
"""Get just the status of a scan (for polling)."""
|
||||||
|
scan = self.get_object()
|
||||||
|
return Response({
|
||||||
|
'id': str(scan.id),
|
||||||
|
'status': scan.status,
|
||||||
|
'status_display': scan.get_status_display(),
|
||||||
|
'progress': self._get_scan_progress(scan),
|
||||||
|
})
|
||||||
|
|
||||||
|
def _get_scan_progress(self, scan):
|
||||||
|
"""Estimate scan progress based on status and results."""
|
||||||
|
if scan.status == 'done':
|
||||||
|
return 100
|
||||||
|
elif scan.status == 'failed':
|
||||||
|
return 0
|
||||||
|
elif scan.status == 'running':
|
||||||
|
# Estimate based on what data we have
|
||||||
|
progress = 10 # Started
|
||||||
|
if scan.raw_headers_data:
|
||||||
|
progress += 20
|
||||||
|
if scan.raw_playwright_data:
|
||||||
|
progress += 25
|
||||||
|
if scan.raw_lighthouse_data:
|
||||||
|
progress += 30
|
||||||
|
if scan.raw_zap_data:
|
||||||
|
progress += 15
|
||||||
|
return min(progress, 95)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class WebsiteViewSet(viewsets.ReadOnlyModelViewSet):
|
||||||
|
"""
|
||||||
|
ViewSet for Website operations.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
- GET /api/websites/ - List all websites
|
||||||
|
- GET /api/websites/{id}/ - Get website details
|
||||||
|
- GET /api/websites/{id}/scans/ - Get scans for a website
|
||||||
|
"""
|
||||||
|
|
||||||
|
queryset = Website.objects.prefetch_related('scans')
|
||||||
|
pagination_class = StandardResultsPagination
|
||||||
|
|
||||||
|
def get_serializer_class(self):
|
||||||
|
if self.action == 'retrieve':
|
||||||
|
return WebsiteDetailSerializer
|
||||||
|
return WebsiteSerializer
|
||||||
|
|
||||||
|
@action(detail=True, methods=['get'])
|
||||||
|
def scans(self, request, pk=None):
|
||||||
|
"""Get all scans for a website."""
|
||||||
|
website = self.get_object()
|
||||||
|
scans = website.scans.all()
|
||||||
|
|
||||||
|
# Apply pagination
|
||||||
|
page = self.paginate_queryset(scans)
|
||||||
|
if page is not None:
|
||||||
|
serializer = ScanListSerializer(page, many=True)
|
||||||
|
return self.get_paginated_response(serializer.data)
|
||||||
|
|
||||||
|
serializer = ScanListSerializer(scans, many=True)
|
||||||
|
return Response(serializer.data)
|
||||||
|
|
||||||
|
|
||||||
|
class IssueViewSet(viewsets.ReadOnlyModelViewSet):
|
||||||
|
"""
|
||||||
|
ViewSet for Issue operations.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
- GET /api/issues/ - List all issues (with filtering)
|
||||||
|
- GET /api/issues/{id}/ - Get issue details
|
||||||
|
"""
|
||||||
|
|
||||||
|
queryset = Issue.objects.select_related('scan', 'scan__website')
|
||||||
|
serializer_class = IssueSerializer
|
||||||
|
pagination_class = StandardResultsPagination
|
||||||
|
|
||||||
|
def get_queryset(self):
|
||||||
|
queryset = super().get_queryset()
|
||||||
|
|
||||||
|
# Filter by scan
|
||||||
|
scan_id = self.request.query_params.get('scan')
|
||||||
|
if scan_id:
|
||||||
|
queryset = queryset.filter(scan_id=scan_id)
|
||||||
|
|
||||||
|
# Filter by category
|
||||||
|
category = self.request.query_params.get('category')
|
||||||
|
if category:
|
||||||
|
queryset = queryset.filter(category=category)
|
||||||
|
|
||||||
|
# Filter by severity
|
||||||
|
severity = self.request.query_params.get('severity')
|
||||||
|
if severity:
|
||||||
|
queryset = queryset.filter(severity=severity)
|
||||||
|
|
||||||
|
# Filter by tool
|
||||||
|
tool = self.request.query_params.get('tool')
|
||||||
|
if tool:
|
||||||
|
queryset = queryset.filter(tool=tool)
|
||||||
|
|
||||||
|
return queryset
|
||||||
|
|
||||||
|
|
||||||
|
@api_view(['GET'])
|
||||||
|
def health_check(request):
|
||||||
|
"""
|
||||||
|
Health check endpoint.
|
||||||
|
|
||||||
|
Checks:
|
||||||
|
- Database connectivity
|
||||||
|
- Redis connectivity
|
||||||
|
- Celery worker status
|
||||||
|
|
||||||
|
Returns health status of all components.
|
||||||
|
"""
|
||||||
|
health = {
|
||||||
|
'status': 'healthy',
|
||||||
|
'database': 'unknown',
|
||||||
|
'redis': 'unknown',
|
||||||
|
'celery': 'unknown',
|
||||||
|
'timestamp': timezone.now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check database
|
||||||
|
try:
|
||||||
|
connection.ensure_connection()
|
||||||
|
health['database'] = 'healthy'
|
||||||
|
except Exception as e:
|
||||||
|
health['database'] = f'unhealthy: {e}'
|
||||||
|
health['status'] = 'unhealthy'
|
||||||
|
|
||||||
|
# Check Redis
|
||||||
|
try:
|
||||||
|
cache.set('health_check', 'ok', 10)
|
||||||
|
if cache.get('health_check') == 'ok':
|
||||||
|
health['redis'] = 'healthy'
|
||||||
|
else:
|
||||||
|
health['redis'] = 'unhealthy: cache not working'
|
||||||
|
health['status'] = 'degraded'
|
||||||
|
except Exception as e:
|
||||||
|
health['redis'] = f'unhealthy: {e}'
|
||||||
|
health['status'] = 'degraded'
|
||||||
|
|
||||||
|
# Check Celery (basic check)
|
||||||
|
try:
|
||||||
|
from core.celery import app as celery_app
|
||||||
|
inspect = celery_app.control.inspect()
|
||||||
|
|
||||||
|
# Try to get active workers
|
||||||
|
active = inspect.active()
|
||||||
|
if active:
|
||||||
|
health['celery'] = f'healthy ({len(active)} workers)'
|
||||||
|
else:
|
||||||
|
health['celery'] = 'degraded: no active workers'
|
||||||
|
health['status'] = 'degraded'
|
||||||
|
except Exception as e:
|
||||||
|
health['celery'] = f'unknown: {e}'
|
||||||
|
|
||||||
|
status_code = 200 if health['status'] == 'healthy' else 503
|
||||||
|
|
||||||
|
serializer = HealthCheckSerializer(health)
|
||||||
|
return Response(serializer.data, status=status_code)
|
||||||
|
|
||||||
|
|
||||||
|
@api_view(['GET'])
|
||||||
|
def api_root(request):
|
||||||
|
"""
|
||||||
|
API root endpoint.
|
||||||
|
|
||||||
|
Returns available endpoints and basic API information.
|
||||||
|
"""
|
||||||
|
return Response({
|
||||||
|
'message': 'Website Analyzer API',
|
||||||
|
'version': '1.0.0',
|
||||||
|
'endpoints': {
|
||||||
|
'scans': '/api/scans/',
|
||||||
|
'websites': '/api/websites/',
|
||||||
|
'issues': '/api/issues/',
|
||||||
|
'health': '/api/health/',
|
||||||
|
},
|
||||||
|
'documentation': {
|
||||||
|
'create_scan': {
|
||||||
|
'method': 'POST',
|
||||||
|
'url': '/api/scans/',
|
||||||
|
'body': {'url': 'https://example.com'},
|
||||||
|
'description': 'Create a new website scan'
|
||||||
|
},
|
||||||
|
'get_scan': {
|
||||||
|
'method': 'GET',
|
||||||
|
'url': '/api/scans/{id}/',
|
||||||
|
'description': 'Get scan results and details'
|
||||||
|
},
|
||||||
|
'list_scans': {
|
||||||
|
'method': 'GET',
|
||||||
|
'url': '/api/scans/',
|
||||||
|
'description': 'List all scans with pagination'
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
"""
|
||||||
|
Core module initialization.
|
||||||
|
|
||||||
|
This module loads the Celery app so that shared_task will use this app.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .celery import app as celery_app
|
||||||
|
|
||||||
|
__all__ = ('celery_app',)
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""
|
||||||
|
ASGI config for Website Analyzer project.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from django.core.asgi import get_asgi_application
|
||||||
|
|
||||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||||
|
|
||||||
|
application = get_asgi_application()
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
"""
|
||||||
|
Celery configuration for Website Analyzer.
|
||||||
|
|
||||||
|
This module configures Celery for asynchronous task processing,
|
||||||
|
specifically for running website scans in the background.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from celery import Celery
|
||||||
|
|
||||||
|
# Set the default Django settings module for the 'celery' program.
|
||||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||||
|
|
||||||
|
app = Celery('website_analyzer')
|
||||||
|
|
||||||
|
# Using a string here means the worker doesn't have to serialize
|
||||||
|
# the configuration object to child processes.
|
||||||
|
app.config_from_object('django.conf:settings', namespace='CELERY')
|
||||||
|
|
||||||
|
# Load task modules from all registered Django apps.
|
||||||
|
app.autodiscover_tasks()
|
||||||
|
|
||||||
|
|
||||||
|
@app.task(bind=True, ignore_result=True)
|
||||||
|
def debug_task(self):
|
||||||
|
"""Debug task for testing Celery connectivity."""
|
||||||
|
print(f'Request: {self.request!r}')
|
||||||
|
|
@ -0,0 +1,300 @@
|
||||||
|
"""
|
||||||
|
Django settings for Website Analyzer project.
|
||||||
|
|
||||||
|
This module contains all configuration settings for the Django application,
|
||||||
|
including database, caching, security, and third-party integrations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
|
||||||
|
# SECURITY WARNING: keep the secret key used in production secret!
|
||||||
|
SECRET_KEY = os.getenv('SECRET_KEY', 'django-insecure-change-me-in-production')
|
||||||
|
|
||||||
|
# SECURITY WARNING: don't run with debug turned on in production!
|
||||||
|
DEBUG = os.getenv('DEBUG', 'False').lower() in ('true', '1', 'yes')
|
||||||
|
|
||||||
|
ALLOWED_HOSTS = os.getenv('ALLOWED_HOSTS', 'localhost,127.0.0.1').split(',')
|
||||||
|
|
||||||
|
|
||||||
|
# Application definition
|
||||||
|
|
||||||
|
INSTALLED_APPS = [
|
||||||
|
'django.contrib.admin',
|
||||||
|
'django.contrib.auth',
|
||||||
|
'django.contrib.contenttypes',
|
||||||
|
'django.contrib.sessions',
|
||||||
|
'django.contrib.messages',
|
||||||
|
'django.contrib.staticfiles',
|
||||||
|
|
||||||
|
# Third-party apps
|
||||||
|
'rest_framework',
|
||||||
|
'corsheaders',
|
||||||
|
|
||||||
|
# Local apps
|
||||||
|
'websites',
|
||||||
|
'scanner',
|
||||||
|
'api',
|
||||||
|
]
|
||||||
|
|
||||||
|
MIDDLEWARE = [
|
||||||
|
'django.middleware.security.SecurityMiddleware',
|
||||||
|
'whitenoise.middleware.WhiteNoiseMiddleware',
|
||||||
|
'corsheaders.middleware.CorsMiddleware',
|
||||||
|
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||||
|
'django.middleware.common.CommonMiddleware',
|
||||||
|
'django.middleware.csrf.CsrfViewMiddleware',
|
||||||
|
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||||
|
'django.contrib.messages.middleware.MessageMiddleware',
|
||||||
|
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||||
|
]
|
||||||
|
|
||||||
|
ROOT_URLCONF = 'core.urls'
|
||||||
|
|
||||||
|
TEMPLATES = [
|
||||||
|
{
|
||||||
|
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||||
|
'DIRS': [BASE_DIR / 'templates'],
|
||||||
|
'APP_DIRS': True,
|
||||||
|
'OPTIONS': {
|
||||||
|
'context_processors': [
|
||||||
|
'django.template.context_processors.debug',
|
||||||
|
'django.template.context_processors.request',
|
||||||
|
'django.contrib.auth.context_processors.auth',
|
||||||
|
'django.contrib.messages.context_processors.messages',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
WSGI_APPLICATION = 'core.wsgi.application'
|
||||||
|
|
||||||
|
|
||||||
|
# Database
|
||||||
|
# Parse DATABASE_URL or use default PostgreSQL settings
|
||||||
|
|
||||||
|
DATABASE_URL = os.getenv('DATABASE_URL', 'postgres://analyzer:analyzer_password@localhost:5432/website_analyzer')
|
||||||
|
|
||||||
|
# Parse the DATABASE_URL
|
||||||
|
import re
|
||||||
|
db_pattern = r'postgres://(?P<user>[^:]+):(?P<password>[^@]+)@(?P<host>[^:]+):(?P<port>\d+)/(?P<name>.+)'
|
||||||
|
db_match = re.match(db_pattern, DATABASE_URL)
|
||||||
|
|
||||||
|
if db_match:
|
||||||
|
DATABASES = {
|
||||||
|
'default': {
|
||||||
|
'ENGINE': 'django.db.backends.postgresql',
|
||||||
|
'NAME': db_match.group('name'),
|
||||||
|
'USER': db_match.group('user'),
|
||||||
|
'PASSWORD': db_match.group('password'),
|
||||||
|
'HOST': db_match.group('host'),
|
||||||
|
'PORT': db_match.group('port'),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Fallback for development
|
||||||
|
DATABASES = {
|
||||||
|
'default': {
|
||||||
|
'ENGINE': 'django.db.backends.sqlite3',
|
||||||
|
'NAME': BASE_DIR / 'db.sqlite3',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Password validation
|
||||||
|
AUTH_PASSWORD_VALIDATORS = [
|
||||||
|
{
|
||||||
|
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# Internationalization
|
||||||
|
LANGUAGE_CODE = 'en-us'
|
||||||
|
TIME_ZONE = 'UTC'
|
||||||
|
USE_I18N = True
|
||||||
|
USE_TZ = True
|
||||||
|
|
||||||
|
|
||||||
|
# Static files (CSS, JavaScript, Images)
|
||||||
|
STATIC_URL = 'static/'
|
||||||
|
STATIC_ROOT = BASE_DIR / 'staticfiles'
|
||||||
|
STATICFILES_DIRS = [BASE_DIR / 'static']
|
||||||
|
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
|
||||||
|
|
||||||
|
# Default primary key field type
|
||||||
|
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# REST Framework Configuration
|
||||||
|
# =============================================================================
|
||||||
|
REST_FRAMEWORK = {
|
||||||
|
'DEFAULT_RENDERER_CLASSES': [
|
||||||
|
'rest_framework.renderers.JSONRenderer',
|
||||||
|
'rest_framework.renderers.BrowsableAPIRenderer',
|
||||||
|
],
|
||||||
|
'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.PageNumberPagination',
|
||||||
|
'PAGE_SIZE': 20,
|
||||||
|
'DEFAULT_THROTTLE_CLASSES': [
|
||||||
|
'rest_framework.throttling.AnonRateThrottle',
|
||||||
|
'rest_framework.throttling.UserRateThrottle'
|
||||||
|
],
|
||||||
|
'DEFAULT_THROTTLE_RATES': {
|
||||||
|
'anon': '100/hour',
|
||||||
|
'user': '1000/hour',
|
||||||
|
'scan': '10/hour', # Specific rate for scan creation
|
||||||
|
},
|
||||||
|
'EXCEPTION_HANDLER': 'api.exceptions.custom_exception_handler',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CORS Configuration
|
||||||
|
# =============================================================================
|
||||||
|
CORS_ALLOWED_ORIGINS = os.getenv(
|
||||||
|
'CORS_ALLOWED_ORIGINS',
|
||||||
|
'http://localhost:3000,http://localhost:8000'
|
||||||
|
).split(',')
|
||||||
|
CORS_ALLOW_CREDENTIALS = True
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Celery Configuration
|
||||||
|
# =============================================================================
|
||||||
|
CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')
|
||||||
|
CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/1')
|
||||||
|
CELERY_ACCEPT_CONTENT = ['json']
|
||||||
|
CELERY_TASK_SERIALIZER = 'json'
|
||||||
|
CELERY_RESULT_SERIALIZER = 'json'
|
||||||
|
CELERY_TIMEZONE = TIME_ZONE
|
||||||
|
CELERY_TASK_TRACK_STARTED = True
|
||||||
|
CELERY_TASK_TIME_LIMIT = int(os.getenv('MAX_SCAN_TIME_SECONDS', '300'))
|
||||||
|
CELERY_TASK_SOFT_TIME_LIMIT = CELERY_TASK_TIME_LIMIT - 30
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Redis Cache Configuration
|
||||||
|
# =============================================================================
|
||||||
|
REDIS_URL = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
|
||||||
|
CACHES = {
|
||||||
|
'default': {
|
||||||
|
'BACKEND': 'django.core.cache.backends.redis.RedisCache',
|
||||||
|
'LOCATION': REDIS_URL,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Scanner Configuration
|
||||||
|
# =============================================================================
|
||||||
|
SCANNER_CONFIG = {
|
||||||
|
# OWASP ZAP settings
|
||||||
|
'ZAP_API_KEY': os.getenv('ZAP_API_KEY', ''),
|
||||||
|
'ZAP_HOST': os.getenv('ZAP_HOST', 'http://localhost:8080'),
|
||||||
|
'ZAP_TIMEOUT': 120,
|
||||||
|
|
||||||
|
# Lighthouse settings
|
||||||
|
'LIGHTHOUSE_CHROME_FLAGS': os.getenv(
|
||||||
|
'LIGHTHOUSE_CHROME_FLAGS',
|
||||||
|
'--headless --no-sandbox --disable-gpu'
|
||||||
|
),
|
||||||
|
'LIGHTHOUSE_TIMEOUT': 60,
|
||||||
|
|
||||||
|
# Playwright settings
|
||||||
|
'PLAYWRIGHT_TIMEOUT': 30000, # milliseconds
|
||||||
|
'PLAYWRIGHT_VIEWPORT': {'width': 1920, 'height': 1080},
|
||||||
|
|
||||||
|
# General scan settings
|
||||||
|
'MAX_SCAN_TIME_SECONDS': int(os.getenv('MAX_SCAN_TIME_SECONDS', '300')),
|
||||||
|
'SCAN_RATE_LIMIT_MINUTES': int(os.getenv('SCAN_RATE_LIMIT_MINUTES', '5')),
|
||||||
|
'MAX_CONCURRENT_SCANS': int(os.getenv('MAX_CONCURRENT_SCANS', '3')),
|
||||||
|
|
||||||
|
# Safety settings - blocked IP ranges (RFC1918 private ranges + localhost)
|
||||||
|
'BLOCKED_IP_RANGES': [
|
||||||
|
'10.0.0.0/8',
|
||||||
|
'172.16.0.0/12',
|
||||||
|
'192.168.0.0/16',
|
||||||
|
'127.0.0.0/8',
|
||||||
|
'169.254.0.0/16', # Link-local
|
||||||
|
'::1/128', # IPv6 localhost
|
||||||
|
'fc00::/7', # IPv6 private
|
||||||
|
'fe80::/10', # IPv6 link-local
|
||||||
|
],
|
||||||
|
'BLOCKED_HOSTS': ['localhost', 'localhost.localdomain'],
|
||||||
|
|
||||||
|
# Large file thresholds
|
||||||
|
'LARGE_IMAGE_THRESHOLD_BYTES': 1024 * 1024, # 1 MB
|
||||||
|
'LARGE_JS_BUNDLE_THRESHOLD_BYTES': 500 * 1024, # 500 KB
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Logging Configuration
|
||||||
|
# =============================================================================
|
||||||
|
LOGGING = {
|
||||||
|
'version': 1,
|
||||||
|
'disable_existing_loggers': False,
|
||||||
|
'formatters': {
|
||||||
|
'verbose': {
|
||||||
|
'format': '{levelname} {asctime} {module} {process:d} {thread:d} {message}',
|
||||||
|
'style': '{',
|
||||||
|
},
|
||||||
|
'simple': {
|
||||||
|
'format': '{levelname} {asctime} {module} {message}',
|
||||||
|
'style': '{',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'handlers': {
|
||||||
|
'console': {
|
||||||
|
'class': 'logging.StreamHandler',
|
||||||
|
'formatter': 'simple',
|
||||||
|
},
|
||||||
|
'file': {
|
||||||
|
'class': 'logging.FileHandler',
|
||||||
|
'filename': BASE_DIR / 'logs' / 'django.log',
|
||||||
|
'formatter': 'verbose',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'root': {
|
||||||
|
'handlers': ['console'],
|
||||||
|
'level': 'INFO',
|
||||||
|
},
|
||||||
|
'loggers': {
|
||||||
|
'django': {
|
||||||
|
'handlers': ['console'],
|
||||||
|
'level': os.getenv('DJANGO_LOG_LEVEL', 'INFO'),
|
||||||
|
'propagate': False,
|
||||||
|
},
|
||||||
|
'scanner': {
|
||||||
|
'handlers': ['console'],
|
||||||
|
'level': 'DEBUG' if DEBUG else 'INFO',
|
||||||
|
'propagate': False,
|
||||||
|
},
|
||||||
|
'celery': {
|
||||||
|
'handlers': ['console'],
|
||||||
|
'level': 'INFO',
|
||||||
|
'propagate': False,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create logs directory if it doesn't exist
|
||||||
|
(BASE_DIR / 'logs').mkdir(exist_ok=True)
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
"""
|
||||||
|
URL configuration for Website Analyzer project.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.contrib import admin
|
||||||
|
from django.urls import path, include
|
||||||
|
from django.views.generic import TemplateView
|
||||||
|
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
# Admin
|
||||||
|
path('admin/', admin.site.urls),
|
||||||
|
|
||||||
|
# API endpoints
|
||||||
|
path('api/', include('api.urls')),
|
||||||
|
|
||||||
|
# Frontend views
|
||||||
|
path('', TemplateView.as_view(template_name='index.html'), name='home'),
|
||||||
|
path('scan/<uuid:scan_id>/', TemplateView.as_view(template_name='scan_detail.html'), name='scan_detail'),
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""
|
||||||
|
WSGI config for Website Analyzer project.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from django.core.wsgi import get_wsgi_application
|
||||||
|
|
||||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||||
|
|
||||||
|
application = get_wsgi_application()
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"""Django's command-line utility for administrative tasks."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run administrative tasks."""
|
||||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||||
|
try:
|
||||||
|
from django.core.management import execute_from_command_line
|
||||||
|
except ImportError as exc:
|
||||||
|
raise ImportError(
|
||||||
|
"Couldn't import Django. Are you sure it's installed and "
|
||||||
|
"available on your PYTHONPATH environment variable? Did you "
|
||||||
|
"forget to activate a virtual environment?"
|
||||||
|
) from exc
|
||||||
|
execute_from_command_line(sys.argv)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=61.0"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "website-analyzer"
|
||||||
|
version = "1.0.0"
|
||||||
|
description = "A Django-based web application for analyzing website performance, security, and best practices"
|
||||||
|
readme = "README.md"
|
||||||
|
license = {text = "MIT"}
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
authors = [
|
||||||
|
{name = "Website Analyzer Team"}
|
||||||
|
]
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 4 - Beta",
|
||||||
|
"Framework :: Django :: 5.0",
|
||||||
|
"Intended Audience :: Developers",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
]
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"Django>=5.0,<6.0",
|
||||||
|
"djangorestframework>=3.14.0",
|
||||||
|
"django-cors-headers>=4.3.0",
|
||||||
|
"psycopg2-binary>=2.9.9",
|
||||||
|
"celery[redis]>=5.3.0",
|
||||||
|
"redis>=5.0.0",
|
||||||
|
"httpx>=0.26.0",
|
||||||
|
"playwright>=1.40.0",
|
||||||
|
"python-dotenv>=1.0.0",
|
||||||
|
"gunicorn>=21.2.0",
|
||||||
|
"whitenoise>=6.6.0",
|
||||||
|
"validators>=0.22.0",
|
||||||
|
"ipaddress>=1.0.23",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"pytest>=7.4.0",
|
||||||
|
"pytest-django>=4.7.0",
|
||||||
|
"pytest-asyncio>=0.23.0",
|
||||||
|
"pytest-cov>=4.1.0",
|
||||||
|
"black>=23.12.0",
|
||||||
|
"isort>=5.13.0",
|
||||||
|
"flake8>=7.0.0",
|
||||||
|
"mypy>=1.8.0",
|
||||||
|
"django-stubs>=4.2.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 100
|
||||||
|
target-version = ['py311']
|
||||||
|
include = '\.pyi?$'
|
||||||
|
exclude = '''
|
||||||
|
/(
|
||||||
|
\.git
|
||||||
|
| \.hg
|
||||||
|
| \.mypy_cache
|
||||||
|
| \.tox
|
||||||
|
| \.venv
|
||||||
|
| _build
|
||||||
|
| buck-out
|
||||||
|
| build
|
||||||
|
| dist
|
||||||
|
| migrations
|
||||||
|
)/
|
||||||
|
'''
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = "black"
|
||||||
|
line_length = 100
|
||||||
|
skip = ["migrations", ".venv"]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
DJANGO_SETTINGS_MODULE = "core.settings"
|
||||||
|
python_files = ["test_*.py", "*_test.py"]
|
||||||
|
addopts = "-v --tb=short"
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.11"
|
||||||
|
plugins = ["mypy_django_plugin.main"]
|
||||||
|
ignore_missing_imports = true
|
||||||
|
strict = false
|
||||||
|
|
||||||
|
[tool.django-stubs]
|
||||||
|
django_settings_module = "core.settings"
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
# Django & REST Framework
|
||||||
|
Django>=5.0,<6.0
|
||||||
|
djangorestframework>=3.14.0
|
||||||
|
django-cors-headers>=4.3.0
|
||||||
|
|
||||||
|
# Database
|
||||||
|
psycopg2-binary>=2.9.9
|
||||||
|
|
||||||
|
# Async Task Queue
|
||||||
|
celery[redis]>=5.3.0
|
||||||
|
redis>=5.0.0
|
||||||
|
|
||||||
|
# HTTP Client
|
||||||
|
httpx>=0.26.0
|
||||||
|
|
||||||
|
# Browser Automation
|
||||||
|
playwright>=1.40.0
|
||||||
|
|
||||||
|
# Environment & Config
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
|
||||||
|
# Production Server
|
||||||
|
gunicorn>=21.2.0
|
||||||
|
whitenoise>=6.6.0
|
||||||
|
|
||||||
|
# Validation & Utilities
|
||||||
|
validators>=0.22.0
|
||||||
|
|
||||||
|
# Development & Testing
|
||||||
|
pytest>=7.4.0
|
||||||
|
pytest-django>=4.7.0
|
||||||
|
pytest-asyncio>=0.23.0
|
||||||
|
pytest-cov>=4.1.0
|
||||||
|
black>=23.12.0
|
||||||
|
isort>=5.13.0
|
||||||
|
flake8>=7.0.0
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
"""
|
||||||
|
Scanner app initialization.
|
||||||
|
"""
|
||||||
|
|
||||||
|
default_app_config = 'scanner.apps.ScannerConfig'
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""
|
||||||
|
Scanner app configuration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class ScannerConfig(AppConfig):
|
||||||
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
|
name = 'scanner'
|
||||||
|
verbose_name = 'Scanner Tools'
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
"""
|
||||||
|
Scanner modules initialization.
|
||||||
|
|
||||||
|
This package contains the various scanner implementations
|
||||||
|
that analyze websites for performance, security, and best practices.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base import BaseScanner, ScannerResult
|
||||||
|
from .lighthouse import LighthouseScanner
|
||||||
|
from .playwright_scanner import PlaywrightScanner
|
||||||
|
from .zap import ZAPScanner
|
||||||
|
from .headers import HeaderScanner
|
||||||
|
from .tls import TLSScanner
|
||||||
|
from .runner import ScanRunner
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'BaseScanner',
|
||||||
|
'ScannerResult',
|
||||||
|
'LighthouseScanner',
|
||||||
|
'PlaywrightScanner',
|
||||||
|
'ZAPScanner',
|
||||||
|
'HeaderScanner',
|
||||||
|
'TLSScanner',
|
||||||
|
'ScanRunner',
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,161 @@
|
||||||
|
"""
|
||||||
|
Base scanner interface and result structures.
|
||||||
|
|
||||||
|
All scanner implementations should inherit from BaseScanner
|
||||||
|
and return ScannerResult objects.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ScannerStatus(str, Enum):
|
||||||
|
"""Status of a scanner execution."""
|
||||||
|
SUCCESS = "success"
|
||||||
|
PARTIAL = "partial"
|
||||||
|
FAILED = "failed"
|
||||||
|
SKIPPED = "skipped"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IssueData:
|
||||||
|
"""
|
||||||
|
Represents a single issue found by a scanner.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
category: Issue category (security, performance, etc.)
|
||||||
|
severity: Issue severity (critical, high, medium, low, info)
|
||||||
|
title: Brief title of the issue
|
||||||
|
description: Detailed description
|
||||||
|
tool: The scanner that found this issue
|
||||||
|
affected_url: Specific URL affected (optional)
|
||||||
|
remediation: Suggested fix (optional)
|
||||||
|
raw_data: Original scanner data (optional)
|
||||||
|
"""
|
||||||
|
category: str
|
||||||
|
severity: str
|
||||||
|
title: str
|
||||||
|
description: str
|
||||||
|
tool: str
|
||||||
|
affected_url: Optional[str] = None
|
||||||
|
remediation: Optional[str] = None
|
||||||
|
raw_data: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MetricData:
|
||||||
|
"""
|
||||||
|
Represents a single metric measured by a scanner.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
name: Internal name (e.g., 'first_contentful_paint_ms')
|
||||||
|
display_name: Human-readable name
|
||||||
|
value: Numeric value
|
||||||
|
unit: Unit of measurement
|
||||||
|
source: The scanner that measured this
|
||||||
|
score: Normalized score (0-1) if available
|
||||||
|
"""
|
||||||
|
name: str
|
||||||
|
display_name: str
|
||||||
|
value: float
|
||||||
|
unit: str
|
||||||
|
source: str
|
||||||
|
score: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScannerResult:
|
||||||
|
"""
|
||||||
|
Result of a scanner execution.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
scanner_name: Name of the scanner
|
||||||
|
status: Execution status
|
||||||
|
issues: List of issues found
|
||||||
|
metrics: List of metrics measured
|
||||||
|
scores: Dictionary of category scores
|
||||||
|
raw_data: Original scanner output
|
||||||
|
error_message: Error details if failed
|
||||||
|
"""
|
||||||
|
scanner_name: str
|
||||||
|
status: ScannerStatus
|
||||||
|
issues: List[IssueData] = field(default_factory=list)
|
||||||
|
metrics: List[MetricData] = field(default_factory=list)
|
||||||
|
scores: Dict[str, int] = field(default_factory=dict)
|
||||||
|
raw_data: Optional[Dict[str, Any]] = None
|
||||||
|
error_message: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class BaseScanner(ABC):
|
||||||
|
"""
|
||||||
|
Abstract base class for all scanners.
|
||||||
|
|
||||||
|
Each scanner implementation must implement the `run` method
|
||||||
|
which performs the actual scan and returns a ScannerResult.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str = "base"
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||||
|
"""
|
||||||
|
Initialize the scanner with optional configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Scanner-specific configuration dictionary
|
||||||
|
"""
|
||||||
|
self.config = config or {}
|
||||||
|
self.logger = logging.getLogger(f"scanner.{self.name}")
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def run(self, url: str) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Run the scanner against the given URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to scan
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScannerResult with findings, metrics, and status
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the scanner service/tool is available.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the scanner can be used, False otherwise
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _create_error_result(self, error: Exception) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Create a failed result from an exception.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error: The exception that occurred
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScannerResult with failed status
|
||||||
|
"""
|
||||||
|
self.logger.error(f"Scanner {self.name} failed: {error}")
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=self.name,
|
||||||
|
status=ScannerStatus.FAILED,
|
||||||
|
error_message=str(error),
|
||||||
|
issues=[
|
||||||
|
IssueData(
|
||||||
|
category="scanner",
|
||||||
|
severity="info",
|
||||||
|
title=f"{self.name.title()} scan failed",
|
||||||
|
description=f"The {self.name} scanner encountered an error: {error}",
|
||||||
|
tool=self.name,
|
||||||
|
remediation="Check scanner service configuration and availability."
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,405 @@
|
||||||
|
"""
|
||||||
|
HTTP Header Security Scanner.
|
||||||
|
|
||||||
|
This module analyzes HTTP response headers for security
|
||||||
|
best practices and common misconfigurations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from .base import (
|
||||||
|
BaseScanner,
|
||||||
|
ScannerResult,
|
||||||
|
ScannerStatus,
|
||||||
|
IssueData,
|
||||||
|
MetricData,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Security header definitions with expected values and severity
|
||||||
|
SECURITY_HEADERS = {
|
||||||
|
'Strict-Transport-Security': {
|
||||||
|
'severity': 'high',
|
||||||
|
'description': 'HTTP Strict Transport Security (HSTS) forces browsers to use HTTPS.',
|
||||||
|
'remediation': (
|
||||||
|
'Add the header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload'
|
||||||
|
),
|
||||||
|
'check_value': lambda v: 'max-age' in v.lower() and int(
|
||||||
|
v.lower().split('max-age=')[1].split(';')[0].strip()
|
||||||
|
) >= 31536000 if 'max-age=' in v.lower() else False,
|
||||||
|
},
|
||||||
|
'Content-Security-Policy': {
|
||||||
|
'severity': 'high',
|
||||||
|
'description': 'Content Security Policy (CSP) helps prevent XSS and data injection attacks.',
|
||||||
|
'remediation': (
|
||||||
|
"Implement a Content-Security-Policy header that restricts sources for scripts, "
|
||||||
|
"styles, and other resources. Start with a report-only policy to identify issues."
|
||||||
|
),
|
||||||
|
'check_value': lambda v: "default-src" in v.lower() or "script-src" in v.lower(),
|
||||||
|
},
|
||||||
|
'X-Content-Type-Options': {
|
||||||
|
'severity': 'medium',
|
||||||
|
'description': 'Prevents browsers from MIME-sniffing responses.',
|
||||||
|
'remediation': 'Add the header: X-Content-Type-Options: nosniff',
|
||||||
|
'check_value': lambda v: v.lower() == 'nosniff',
|
||||||
|
},
|
||||||
|
'X-Frame-Options': {
|
||||||
|
'severity': 'medium',
|
||||||
|
'description': 'Protects against clickjacking by controlling page framing.',
|
||||||
|
'remediation': 'Add the header: X-Frame-Options: DENY or SAMEORIGIN',
|
||||||
|
'check_value': lambda v: v.upper() in ['DENY', 'SAMEORIGIN'],
|
||||||
|
},
|
||||||
|
'Referrer-Policy': {
|
||||||
|
'severity': 'low',
|
||||||
|
'description': 'Controls how much referrer information is sent with requests.',
|
||||||
|
'remediation': (
|
||||||
|
'Add the header: Referrer-Policy: strict-origin-when-cross-origin '
|
||||||
|
'or no-referrer-when-downgrade'
|
||||||
|
),
|
||||||
|
'check_value': lambda v: v.lower() in [
|
||||||
|
'no-referrer', 'no-referrer-when-downgrade',
|
||||||
|
'strict-origin', 'strict-origin-when-cross-origin',
|
||||||
|
'same-origin', 'origin', 'origin-when-cross-origin'
|
||||||
|
],
|
||||||
|
},
|
||||||
|
'Permissions-Policy': {
|
||||||
|
'severity': 'low',
|
||||||
|
'description': 'Controls which browser features can be used.',
|
||||||
|
'remediation': (
|
||||||
|
'Add a Permissions-Policy header to restrict access to sensitive browser APIs '
|
||||||
|
'like geolocation, camera, and microphone.'
|
||||||
|
),
|
||||||
|
'check_value': lambda v: len(v) > 0,
|
||||||
|
},
|
||||||
|
'X-XSS-Protection': {
|
||||||
|
'severity': 'info',
|
||||||
|
'description': 'Legacy XSS filter (deprecated in modern browsers, CSP is preferred).',
|
||||||
|
'remediation': 'While deprecated, you can add: X-XSS-Protection: 1; mode=block',
|
||||||
|
'check_value': lambda v: '1' in v,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# CORS security checks
|
||||||
|
CORS_CHECKS = {
|
||||||
|
'permissive_origin': {
|
||||||
|
'severity': 'high',
|
||||||
|
'title': 'Overly permissive CORS (Access-Control-Allow-Origin: *)',
|
||||||
|
'description': (
|
||||||
|
'The server allows requests from any origin. This can expose sensitive data '
|
||||||
|
'to malicious websites if combined with credentials.'
|
||||||
|
),
|
||||||
|
'remediation': (
|
||||||
|
'Restrict Access-Control-Allow-Origin to specific trusted domains instead of using *. '
|
||||||
|
'Never use * with Access-Control-Allow-Credentials: true.'
|
||||||
|
),
|
||||||
|
},
|
||||||
|
'credentials_with_wildcard': {
|
||||||
|
'severity': 'critical',
|
||||||
|
'title': 'CORS allows credentials with wildcard origin',
|
||||||
|
'description': (
|
||||||
|
'The server has Access-Control-Allow-Credentials: true with Access-Control-Allow-Origin: *. '
|
||||||
|
'This is a severe misconfiguration that can allow credential theft.'
|
||||||
|
),
|
||||||
|
'remediation': (
|
||||||
|
'Never combine Access-Control-Allow-Credentials: true with a wildcard origin. '
|
||||||
|
'Implement a whitelist of allowed origins.'
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class HeaderScanner(BaseScanner):
|
||||||
|
"""
|
||||||
|
Scanner for HTTP security headers.
|
||||||
|
|
||||||
|
Checks for:
|
||||||
|
- Missing security headers
|
||||||
|
- Improperly configured headers
|
||||||
|
- CORS misconfigurations
|
||||||
|
- Cookie security flags
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "header_check"
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||||
|
super().__init__(config)
|
||||||
|
self.timeout = self.config.get('timeout', 30)
|
||||||
|
|
||||||
|
def run(self, url: str) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Run header security analysis on the URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScannerResult with header findings
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Starting header scan for {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Make both GET and HEAD requests
|
||||||
|
headers_data = self._fetch_headers(url)
|
||||||
|
|
||||||
|
issues = []
|
||||||
|
metrics = []
|
||||||
|
|
||||||
|
# Check security headers
|
||||||
|
header_issues, header_score = self._check_security_headers(
|
||||||
|
headers_data['headers']
|
||||||
|
)
|
||||||
|
issues.extend(header_issues)
|
||||||
|
|
||||||
|
# Check CORS configuration
|
||||||
|
cors_issues = self._check_cors(headers_data['headers'], url)
|
||||||
|
issues.extend(cors_issues)
|
||||||
|
|
||||||
|
# Check cookies
|
||||||
|
cookie_issues = self._check_cookies(headers_data['headers'], url)
|
||||||
|
issues.extend(cookie_issues)
|
||||||
|
|
||||||
|
# Create metrics
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='security_headers_score',
|
||||||
|
display_name='Security Headers Score',
|
||||||
|
value=float(header_score),
|
||||||
|
unit='percent',
|
||||||
|
source='header_check'
|
||||||
|
))
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='headers_missing_count',
|
||||||
|
display_name='Missing Security Headers',
|
||||||
|
value=float(len([i for i in header_issues if 'missing' in i.title.lower()])),
|
||||||
|
unit='count',
|
||||||
|
source='header_check'
|
||||||
|
))
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Header scan complete: {len(issues)} issues, score: {header_score}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=self.name,
|
||||||
|
status=ScannerStatus.SUCCESS,
|
||||||
|
issues=issues,
|
||||||
|
metrics=metrics,
|
||||||
|
raw_data=headers_data
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
return self._create_error_result(Exception("Header check timed out"))
|
||||||
|
except Exception as e:
|
||||||
|
return self._create_error_result(e)
|
||||||
|
|
||||||
|
def _fetch_headers(self, url: str) -> Dict[str, Any]:
|
||||||
|
"""Fetch headers from the URL."""
|
||||||
|
with httpx.Client(
|
||||||
|
timeout=self.timeout,
|
||||||
|
follow_redirects=True,
|
||||||
|
verify=True
|
||||||
|
) as client:
|
||||||
|
# GET request
|
||||||
|
get_response = client.get(url)
|
||||||
|
|
||||||
|
# HEAD request
|
||||||
|
head_response = client.head(url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': str(get_response.url),
|
||||||
|
'status_code': get_response.status_code,
|
||||||
|
'headers': dict(get_response.headers),
|
||||||
|
'head_headers': dict(head_response.headers),
|
||||||
|
'redirected': str(get_response.url) != url,
|
||||||
|
'redirect_history': [str(r.url) for r in get_response.history],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _check_security_headers(
|
||||||
|
self,
|
||||||
|
headers: Dict[str, str]
|
||||||
|
) -> Tuple[List[IssueData], int]:
|
||||||
|
"""
|
||||||
|
Check for security headers.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (list of issues, security score 0-100)
|
||||||
|
"""
|
||||||
|
issues = []
|
||||||
|
score = 100
|
||||||
|
headers_lower = {k.lower(): v for k, v in headers.items()}
|
||||||
|
|
||||||
|
for header_name, config in SECURITY_HEADERS.items():
|
||||||
|
header_key = header_name.lower()
|
||||||
|
|
||||||
|
if header_key not in headers_lower:
|
||||||
|
# Missing header
|
||||||
|
severity = config['severity']
|
||||||
|
deduction = {'critical': 20, 'high': 15, 'medium': 10, 'low': 5, 'info': 2}
|
||||||
|
score -= deduction.get(severity, 5)
|
||||||
|
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='headers',
|
||||||
|
severity=severity,
|
||||||
|
title=f'Missing security header: {header_name}',
|
||||||
|
description=config['description'],
|
||||||
|
tool='header_check',
|
||||||
|
remediation=config['remediation'],
|
||||||
|
raw_data={'header': header_name, 'status': 'missing'}
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
# Header present, check value
|
||||||
|
value = headers_lower[header_key]
|
||||||
|
check_func = config.get('check_value')
|
||||||
|
|
||||||
|
if check_func and not check_func(value):
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='headers',
|
||||||
|
severity='low',
|
||||||
|
title=f'Weak configuration: {header_name}',
|
||||||
|
description=(
|
||||||
|
f"{config['description']} "
|
||||||
|
f"Current value may not provide optimal protection: {value}"
|
||||||
|
),
|
||||||
|
tool='header_check',
|
||||||
|
remediation=config['remediation'],
|
||||||
|
raw_data={'header': header_name, 'value': value, 'status': 'weak'}
|
||||||
|
))
|
||||||
|
score -= 3
|
||||||
|
|
||||||
|
return issues, max(0, score)
|
||||||
|
|
||||||
|
def _check_cors(self, headers: Dict[str, str], url: str) -> List[IssueData]:
|
||||||
|
"""Check CORS configuration for issues."""
|
||||||
|
issues = []
|
||||||
|
headers_lower = {k.lower(): v for k, v in headers.items()}
|
||||||
|
|
||||||
|
acao = headers_lower.get('access-control-allow-origin', '')
|
||||||
|
acac = headers_lower.get('access-control-allow-credentials', '')
|
||||||
|
|
||||||
|
if acao == '*':
|
||||||
|
if acac.lower() == 'true':
|
||||||
|
# Critical: credentials with wildcard
|
||||||
|
check = CORS_CHECKS['credentials_with_wildcard']
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='cors',
|
||||||
|
severity=check['severity'],
|
||||||
|
title=check['title'],
|
||||||
|
description=check['description'],
|
||||||
|
tool='header_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=check['remediation'],
|
||||||
|
raw_data={
|
||||||
|
'Access-Control-Allow-Origin': acao,
|
||||||
|
'Access-Control-Allow-Credentials': acac
|
||||||
|
}
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
# Warning: permissive origin
|
||||||
|
check = CORS_CHECKS['permissive_origin']
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='cors',
|
||||||
|
severity='medium', # Lower severity without credentials
|
||||||
|
title=check['title'],
|
||||||
|
description=check['description'],
|
||||||
|
tool='header_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=check['remediation'],
|
||||||
|
raw_data={'Access-Control-Allow-Origin': acao}
|
||||||
|
))
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
||||||
|
def _check_cookies(self, headers: Dict[str, str], url: str) -> List[IssueData]:
|
||||||
|
"""Check Set-Cookie headers for security flags."""
|
||||||
|
issues = []
|
||||||
|
headers_lower = {k.lower(): v for k, v in headers.items()}
|
||||||
|
|
||||||
|
# Get all Set-Cookie headers
|
||||||
|
set_cookies = []
|
||||||
|
for key, value in headers.items():
|
||||||
|
if key.lower() == 'set-cookie':
|
||||||
|
set_cookies.append(value)
|
||||||
|
|
||||||
|
is_https = url.startswith('https://')
|
||||||
|
|
||||||
|
for cookie in set_cookies:
|
||||||
|
cookie_lower = cookie.lower()
|
||||||
|
cookie_name = cookie.split('=')[0] if '=' in cookie else 'unknown'
|
||||||
|
|
||||||
|
cookie_issues = []
|
||||||
|
|
||||||
|
# Check Secure flag on HTTPS
|
||||||
|
if is_https and 'secure' not in cookie_lower:
|
||||||
|
cookie_issues.append({
|
||||||
|
'flag': 'Secure',
|
||||||
|
'description': (
|
||||||
|
'Cookie is set without Secure flag on HTTPS site. '
|
||||||
|
'This allows the cookie to be sent over unencrypted connections.'
|
||||||
|
),
|
||||||
|
'severity': 'high'
|
||||||
|
})
|
||||||
|
|
||||||
|
# Check HttpOnly flag (important for session cookies)
|
||||||
|
if 'httponly' not in cookie_lower:
|
||||||
|
# Check if it might be a session cookie
|
||||||
|
if any(term in cookie_name.lower() for term in ['session', 'auth', 'token', 'user']):
|
||||||
|
cookie_issues.append({
|
||||||
|
'flag': 'HttpOnly',
|
||||||
|
'description': (
|
||||||
|
'Session-like cookie is set without HttpOnly flag. '
|
||||||
|
'This allows JavaScript access, increasing XSS risk.'
|
||||||
|
),
|
||||||
|
'severity': 'high'
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
cookie_issues.append({
|
||||||
|
'flag': 'HttpOnly',
|
||||||
|
'description': (
|
||||||
|
'Cookie is set without HttpOnly flag. '
|
||||||
|
'Consider adding it unless JavaScript needs access.'
|
||||||
|
),
|
||||||
|
'severity': 'low'
|
||||||
|
})
|
||||||
|
|
||||||
|
# Check SameSite attribute
|
||||||
|
if 'samesite' not in cookie_lower:
|
||||||
|
cookie_issues.append({
|
||||||
|
'flag': 'SameSite',
|
||||||
|
'description': (
|
||||||
|
'Cookie is set without SameSite attribute. '
|
||||||
|
'This can enable CSRF attacks in some scenarios.'
|
||||||
|
),
|
||||||
|
'severity': 'medium'
|
||||||
|
})
|
||||||
|
elif 'samesite=none' in cookie_lower and 'secure' not in cookie_lower:
|
||||||
|
cookie_issues.append({
|
||||||
|
'flag': 'SameSite=None without Secure',
|
||||||
|
'description': (
|
||||||
|
'Cookie has SameSite=None but no Secure flag. '
|
||||||
|
'Modern browsers will reject this cookie.'
|
||||||
|
),
|
||||||
|
'severity': 'medium'
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create issues for this cookie
|
||||||
|
for ci in cookie_issues:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='security',
|
||||||
|
severity=ci['severity'],
|
||||||
|
title=f"Cookie '{cookie_name}' missing {ci['flag']} flag",
|
||||||
|
description=ci['description'],
|
||||||
|
tool='header_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
f"Add the {ci['flag']} flag to the Set-Cookie header. "
|
||||||
|
f"Example: Set-Cookie: {cookie_name}=value; Secure; HttpOnly; SameSite=Strict"
|
||||||
|
),
|
||||||
|
raw_data={'cookie': cookie[:200]} # Truncate for storage
|
||||||
|
))
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
@ -0,0 +1,323 @@
|
||||||
|
"""
|
||||||
|
Lighthouse Scanner Integration.
|
||||||
|
|
||||||
|
This module integrates with Google Lighthouse to measure
|
||||||
|
performance, accessibility, SEO, and best practices.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .base import (
|
||||||
|
BaseScanner,
|
||||||
|
ScannerResult,
|
||||||
|
ScannerStatus,
|
||||||
|
IssueData,
|
||||||
|
MetricData,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class LighthouseScanner(BaseScanner):
|
||||||
|
"""
|
||||||
|
Scanner that uses Google Lighthouse for performance analysis.
|
||||||
|
|
||||||
|
Communicates with the Lighthouse service container via HTTP API.
|
||||||
|
Collects performance metrics, Core Web Vitals, and various audits.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "lighthouse"
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||||
|
super().__init__(config)
|
||||||
|
self.service_url = self.config.get(
|
||||||
|
'service_url',
|
||||||
|
'http://lighthouse:3001'
|
||||||
|
)
|
||||||
|
self.timeout = self.config.get('timeout', 120)
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
"""Check if Lighthouse service is available."""
|
||||||
|
try:
|
||||||
|
with httpx.Client(timeout=5) as client:
|
||||||
|
response = client.get(f"{self.service_url}/health")
|
||||||
|
return response.status_code == 200
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Lighthouse service not available: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def run(self, url: str) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Run Lighthouse scan against the URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScannerResult with performance metrics and issues
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Starting Lighthouse scan for {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with httpx.Client(timeout=self.timeout) as client:
|
||||||
|
response = client.post(
|
||||||
|
f"{self.service_url}/scan",
|
||||||
|
json={"url": url}
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
return self._parse_results(url, data)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
return self._create_error_result(
|
||||||
|
Exception("Lighthouse scan timed out")
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
return self._create_error_result(
|
||||||
|
Exception(f"Lighthouse service error: {e.response.status_code}")
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return self._create_error_result(e)
|
||||||
|
|
||||||
|
def _parse_results(self, url: str, data: Dict[str, Any]) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Parse Lighthouse results into ScannerResult format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The scanned URL
|
||||||
|
data: Raw Lighthouse response data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parsed ScannerResult
|
||||||
|
"""
|
||||||
|
issues = []
|
||||||
|
metrics = []
|
||||||
|
|
||||||
|
# Extract scores
|
||||||
|
scores = data.get('scores', {})
|
||||||
|
|
||||||
|
# Extract and create metrics
|
||||||
|
raw_metrics = data.get('metrics', {})
|
||||||
|
|
||||||
|
# Core Web Vitals
|
||||||
|
metric_mappings = [
|
||||||
|
('firstContentfulPaint', 'First Contentful Paint', 'ms'),
|
||||||
|
('largestContentfulPaint', 'Largest Contentful Paint', 'ms'),
|
||||||
|
('speedIndex', 'Speed Index', 'ms'),
|
||||||
|
('timeToInteractive', 'Time to Interactive', 'ms'),
|
||||||
|
('totalBlockingTime', 'Total Blocking Time', 'ms'),
|
||||||
|
('cumulativeLayoutShift', 'Cumulative Layout Shift', 'score'),
|
||||||
|
]
|
||||||
|
|
||||||
|
for key, display_name, unit in metric_mappings:
|
||||||
|
metric_data = raw_metrics.get(key, {})
|
||||||
|
if metric_data and metric_data.get('value') is not None:
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name=self._to_snake_case(key),
|
||||||
|
display_name=display_name,
|
||||||
|
value=metric_data['value'],
|
||||||
|
unit=unit,
|
||||||
|
source='lighthouse',
|
||||||
|
score=metric_data.get('score')
|
||||||
|
))
|
||||||
|
|
||||||
|
# Resource metrics
|
||||||
|
resources = data.get('resources', {})
|
||||||
|
diagnostics = data.get('diagnostics', {})
|
||||||
|
|
||||||
|
if resources.get('totalByteWeight'):
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='total_byte_weight',
|
||||||
|
display_name='Total Page Weight',
|
||||||
|
value=resources['totalByteWeight'],
|
||||||
|
unit='bytes',
|
||||||
|
source='lighthouse'
|
||||||
|
))
|
||||||
|
|
||||||
|
if resources.get('bootupTime'):
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='javascript_bootup_time',
|
||||||
|
display_name='JavaScript Boot-up Time',
|
||||||
|
value=resources['bootupTime'],
|
||||||
|
unit='ms',
|
||||||
|
source='lighthouse'
|
||||||
|
))
|
||||||
|
|
||||||
|
if diagnostics.get('numRequests'):
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='total_requests',
|
||||||
|
display_name='Total Network Requests',
|
||||||
|
value=float(diagnostics['numRequests']),
|
||||||
|
unit='count',
|
||||||
|
source='lighthouse'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Extract issues from failed audits
|
||||||
|
raw_issues = data.get('issues', [])
|
||||||
|
for issue in raw_issues:
|
||||||
|
severity = self._score_to_severity(issue.get('score', 0.5))
|
||||||
|
category = self._map_category(issue.get('category', 'performance'))
|
||||||
|
|
||||||
|
issues.append(IssueData(
|
||||||
|
category=category,
|
||||||
|
severity=severity,
|
||||||
|
title=issue.get('title', 'Unknown issue'),
|
||||||
|
description=issue.get('description', ''),
|
||||||
|
tool='lighthouse',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=self._get_remediation(issue.get('id')),
|
||||||
|
raw_data=issue
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check for large bundles
|
||||||
|
large_scripts = resources.get('scriptTreemap', [])
|
||||||
|
for script in large_scripts[:5]: # Top 5 largest
|
||||||
|
if script.get('resourceBytes', 0) > settings.SCANNER_CONFIG.get(
|
||||||
|
'LARGE_JS_BUNDLE_THRESHOLD_BYTES', 500 * 1024
|
||||||
|
):
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='resources',
|
||||||
|
severity='medium',
|
||||||
|
title=f"Large JavaScript bundle detected",
|
||||||
|
description=(
|
||||||
|
f"The script '{script.get('name', 'Unknown')}' "
|
||||||
|
f"is {script['resourceBytes'] / 1024:.1f} KB. "
|
||||||
|
"Large bundles can slow down page load and increase memory usage."
|
||||||
|
),
|
||||||
|
tool='lighthouse',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
"Consider code splitting, tree shaking, or lazy loading "
|
||||||
|
"to reduce bundle size."
|
||||||
|
),
|
||||||
|
raw_data=script
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check for unused JavaScript
|
||||||
|
unused_js = resources.get('unusedJavascript', [])
|
||||||
|
if unused_js:
|
||||||
|
total_wasted = sum(u.get('wastedBytes', 0) for u in unused_js)
|
||||||
|
if total_wasted > 100 * 1024: # More than 100KB unused
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='performance',
|
||||||
|
severity='medium',
|
||||||
|
title="Significant unused JavaScript detected",
|
||||||
|
description=(
|
||||||
|
f"Found {total_wasted / 1024:.1f} KB of unused JavaScript "
|
||||||
|
f"across {len(unused_js)} resources. This increases page "
|
||||||
|
"load time and memory usage."
|
||||||
|
),
|
||||||
|
tool='lighthouse',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
"Remove unused code or use code splitting to load "
|
||||||
|
"JavaScript only when needed."
|
||||||
|
),
|
||||||
|
raw_data={'unused_resources': unused_js}
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check for render-blocking resources
|
||||||
|
blocking = resources.get('renderBlockingResources', [])
|
||||||
|
if blocking:
|
||||||
|
total_wasted_ms = sum(r.get('wastedMs', 0) for r in blocking)
|
||||||
|
if total_wasted_ms > 500:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='performance',
|
||||||
|
severity='medium',
|
||||||
|
title="Render-blocking resources detected",
|
||||||
|
description=(
|
||||||
|
f"Found {len(blocking)} render-blocking resources "
|
||||||
|
f"adding approximately {total_wasted_ms:.0f}ms to page load. "
|
||||||
|
"These resources delay first paint."
|
||||||
|
),
|
||||||
|
tool='lighthouse',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
"Consider inlining critical CSS, deferring non-critical JS, "
|
||||||
|
"or using async/defer attributes."
|
||||||
|
),
|
||||||
|
raw_data={'blocking_resources': blocking}
|
||||||
|
))
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Lighthouse scan complete: {len(issues)} issues, {len(metrics)} metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=self.name,
|
||||||
|
status=ScannerStatus.SUCCESS,
|
||||||
|
issues=issues,
|
||||||
|
metrics=metrics,
|
||||||
|
scores={
|
||||||
|
'performance': scores.get('performance', 0),
|
||||||
|
'accessibility': scores.get('accessibility', 0),
|
||||||
|
'best_practices': scores.get('bestPractices', 0),
|
||||||
|
'seo': scores.get('seo', 0),
|
||||||
|
},
|
||||||
|
raw_data=data
|
||||||
|
)
|
||||||
|
|
||||||
|
def _to_snake_case(self, name: str) -> str:
|
||||||
|
"""Convert camelCase to snake_case."""
|
||||||
|
import re
|
||||||
|
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||||
|
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
||||||
|
|
||||||
|
def _score_to_severity(self, score: float) -> str:
|
||||||
|
"""Convert Lighthouse score to severity level."""
|
||||||
|
if score is None:
|
||||||
|
return 'info'
|
||||||
|
elif score < 0.25:
|
||||||
|
return 'high'
|
||||||
|
elif score < 0.5:
|
||||||
|
return 'medium'
|
||||||
|
elif score < 0.75:
|
||||||
|
return 'low'
|
||||||
|
else:
|
||||||
|
return 'info'
|
||||||
|
|
||||||
|
def _map_category(self, lighthouse_category: str) -> str:
|
||||||
|
"""Map Lighthouse category to our category."""
|
||||||
|
mapping = {
|
||||||
|
'performance': 'performance',
|
||||||
|
'accessibility': 'accessibility',
|
||||||
|
'best-practices': 'best_practices',
|
||||||
|
'seo': 'seo',
|
||||||
|
}
|
||||||
|
return mapping.get(lighthouse_category, 'performance')
|
||||||
|
|
||||||
|
def _get_remediation(self, audit_id: str) -> str:
|
||||||
|
"""Get remediation text for known audit IDs."""
|
||||||
|
remediations = {
|
||||||
|
'first-contentful-paint': (
|
||||||
|
"Reduce server response time, eliminate render-blocking resources, "
|
||||||
|
"and optimize critical rendering path."
|
||||||
|
),
|
||||||
|
'largest-contentful-paint': (
|
||||||
|
"Optimize images, preload critical resources, and reduce server "
|
||||||
|
"response time."
|
||||||
|
),
|
||||||
|
'total-blocking-time': (
|
||||||
|
"Reduce JavaScript execution time by breaking up long tasks, "
|
||||||
|
"removing unused code, and minimizing main thread work."
|
||||||
|
),
|
||||||
|
'cumulative-layout-shift': (
|
||||||
|
"Always include size attributes on images and videos, reserve space "
|
||||||
|
"for ad slots, and avoid inserting content above existing content."
|
||||||
|
),
|
||||||
|
'speed-index': (
|
||||||
|
"Minimize main thread work, reduce JavaScript execution time, "
|
||||||
|
"and ensure text remains visible during webfont load."
|
||||||
|
),
|
||||||
|
'interactive': (
|
||||||
|
"Reduce JavaScript payload, defer non-critical scripts, and "
|
||||||
|
"minimize main thread work."
|
||||||
|
),
|
||||||
|
}
|
||||||
|
return remediations.get(audit_id, "Review and optimize based on the audit details.")
|
||||||
|
|
@ -0,0 +1,397 @@
|
||||||
|
"""
|
||||||
|
Playwright Scanner Integration.
|
||||||
|
|
||||||
|
This module uses Playwright to perform browser-based analysis,
|
||||||
|
capturing console errors, network requests, and resource metrics.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .base import (
|
||||||
|
BaseScanner,
|
||||||
|
ScannerResult,
|
||||||
|
ScannerStatus,
|
||||||
|
IssueData,
|
||||||
|
MetricData,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PlaywrightScanner(BaseScanner):
|
||||||
|
"""
|
||||||
|
Scanner using Playwright for browser-based analysis.
|
||||||
|
|
||||||
|
Captures:
|
||||||
|
- Console errors and warnings
|
||||||
|
- Network request details
|
||||||
|
- Page load timing
|
||||||
|
- Large resources (images, scripts)
|
||||||
|
- Memory usage indicators
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "playwright"
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||||
|
super().__init__(config)
|
||||||
|
self.timeout = self.config.get('timeout', 30000) # 30 seconds
|
||||||
|
self.viewport = self.config.get('viewport', {'width': 1920, 'height': 1080})
|
||||||
|
|
||||||
|
def run(self, url: str) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Run Playwright analysis on the URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScannerResult with browser analysis data
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Starting Playwright scan for {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run async scan in sync context
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
try:
|
||||||
|
result = loop.run_until_complete(self._async_scan(url))
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return self._create_error_result(e)
|
||||||
|
|
||||||
|
async def _async_scan(self, url: str) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Async implementation of the scan.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScannerResult with findings
|
||||||
|
"""
|
||||||
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
|
issues = []
|
||||||
|
metrics = []
|
||||||
|
raw_data = {
|
||||||
|
'console_messages': [],
|
||||||
|
'network_requests': [],
|
||||||
|
'failed_requests': [],
|
||||||
|
'large_resources': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
async with async_playwright() as p:
|
||||||
|
browser = await p.chromium.launch(
|
||||||
|
headless=True,
|
||||||
|
args=[
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-gpu',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
context = await browser.new_context(
|
||||||
|
viewport=self.viewport,
|
||||||
|
user_agent=(
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
||||||
|
'(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
page = await context.new_page()
|
||||||
|
|
||||||
|
# Collect data
|
||||||
|
console_messages = []
|
||||||
|
network_requests = []
|
||||||
|
failed_requests = []
|
||||||
|
|
||||||
|
# Set up event listeners
|
||||||
|
page.on("console", lambda msg: console_messages.append({
|
||||||
|
'type': msg.type,
|
||||||
|
'text': msg.text,
|
||||||
|
'location': str(msg.location) if msg.location else None,
|
||||||
|
}))
|
||||||
|
|
||||||
|
page.on("request", lambda req: network_requests.append({
|
||||||
|
'url': req.url,
|
||||||
|
'method': req.method,
|
||||||
|
'resource_type': req.resource_type,
|
||||||
|
'timestamp': time.time(),
|
||||||
|
}))
|
||||||
|
|
||||||
|
page.on("requestfailed", lambda req: failed_requests.append({
|
||||||
|
'url': req.url,
|
||||||
|
'failure': req.failure,
|
||||||
|
'resource_type': req.resource_type,
|
||||||
|
}))
|
||||||
|
|
||||||
|
# Navigate and measure
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await page.goto(
|
||||||
|
url,
|
||||||
|
wait_until='networkidle',
|
||||||
|
timeout=self.timeout
|
||||||
|
)
|
||||||
|
load_time = (time.time() - start_time) * 1000 # Convert to ms
|
||||||
|
|
||||||
|
# Get response status
|
||||||
|
status_code = response.status if response else 0
|
||||||
|
|
||||||
|
# Wait a bit more for any delayed scripts
|
||||||
|
await page.wait_for_timeout(2000)
|
||||||
|
|
||||||
|
# Get performance timing
|
||||||
|
perf_timing = await page.evaluate('''() => {
|
||||||
|
const timing = performance.timing;
|
||||||
|
const navigation = performance.getEntriesByType("navigation")[0];
|
||||||
|
return {
|
||||||
|
domContentLoaded: timing.domContentLoadedEventEnd - timing.navigationStart,
|
||||||
|
domComplete: timing.domComplete - timing.navigationStart,
|
||||||
|
loadEvent: timing.loadEventEnd - timing.navigationStart,
|
||||||
|
firstPaint: navigation ? navigation.domComplete : null,
|
||||||
|
transferSize: navigation ? navigation.transferSize : null,
|
||||||
|
};
|
||||||
|
}''')
|
||||||
|
|
||||||
|
# Get memory info (if available)
|
||||||
|
memory_info = await page.evaluate('''() => {
|
||||||
|
if (performance.memory) {
|
||||||
|
return {
|
||||||
|
usedJSHeapSize: performance.memory.usedJSHeapSize,
|
||||||
|
totalJSHeapSize: performance.memory.totalJSHeapSize,
|
||||||
|
jsHeapSizeLimit: performance.memory.jsHeapSizeLimit,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}''')
|
||||||
|
|
||||||
|
# Get resource sizes
|
||||||
|
resources = await page.evaluate('''() => {
|
||||||
|
const entries = performance.getEntriesByType("resource");
|
||||||
|
return entries.map(e => ({
|
||||||
|
name: e.name,
|
||||||
|
type: e.initiatorType,
|
||||||
|
transferSize: e.transferSize,
|
||||||
|
duration: e.duration,
|
||||||
|
}));
|
||||||
|
}''')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Page navigation error: {e}")
|
||||||
|
load_time = self.timeout
|
||||||
|
status_code = 0
|
||||||
|
perf_timing = {}
|
||||||
|
memory_info = None
|
||||||
|
resources = []
|
||||||
|
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
# Process collected data
|
||||||
|
raw_data['console_messages'] = console_messages
|
||||||
|
raw_data['network_requests'] = network_requests[:100] # Limit stored
|
||||||
|
raw_data['failed_requests'] = failed_requests
|
||||||
|
raw_data['performance_timing'] = perf_timing if 'perf_timing' in locals() else {}
|
||||||
|
raw_data['memory_info'] = memory_info if 'memory_info' in locals() else None
|
||||||
|
raw_data['status_code'] = status_code if 'status_code' in locals() else 0
|
||||||
|
|
||||||
|
# Create metrics
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='page_load_time',
|
||||||
|
display_name='Page Load Time',
|
||||||
|
value=load_time,
|
||||||
|
unit='ms',
|
||||||
|
source='playwright'
|
||||||
|
))
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='total_network_requests',
|
||||||
|
display_name='Total Network Requests',
|
||||||
|
value=float(len(network_requests)),
|
||||||
|
unit='count',
|
||||||
|
source='playwright'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Calculate total transfer size
|
||||||
|
total_transfer = sum(r.get('transferSize', 0) for r in resources if r.get('transferSize'))
|
||||||
|
if total_transfer > 0:
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='total_transfer_size',
|
||||||
|
display_name='Total Transfer Size',
|
||||||
|
value=float(total_transfer),
|
||||||
|
unit='bytes',
|
||||||
|
source='playwright'
|
||||||
|
))
|
||||||
|
|
||||||
|
if perf_timing.get('domContentLoaded'):
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='dom_content_loaded',
|
||||||
|
display_name='DOM Content Loaded',
|
||||||
|
value=float(perf_timing['domContentLoaded']),
|
||||||
|
unit='ms',
|
||||||
|
source='playwright'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Memory metrics
|
||||||
|
if memory_info:
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='js_heap_used',
|
||||||
|
display_name='JS Heap Used',
|
||||||
|
value=float(memory_info.get('usedJSHeapSize', 0)),
|
||||||
|
unit='bytes',
|
||||||
|
source='playwright'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check for high memory usage
|
||||||
|
heap_used = memory_info.get('usedJSHeapSize', 0)
|
||||||
|
heap_limit = memory_info.get('jsHeapSizeLimit', 1)
|
||||||
|
heap_percent = (heap_used / heap_limit) * 100 if heap_limit > 0 else 0
|
||||||
|
|
||||||
|
if heap_percent > 50:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='resources',
|
||||||
|
severity='medium',
|
||||||
|
title='High JavaScript memory usage',
|
||||||
|
description=(
|
||||||
|
f'JavaScript is using {heap_used / (1024*1024):.1f} MB '
|
||||||
|
f'({heap_percent:.1f}% of available heap). '
|
||||||
|
'This may indicate memory-heavy operations or potential leaks.'
|
||||||
|
),
|
||||||
|
tool='playwright',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
'Review JavaScript for memory leaks, optimize data structures, '
|
||||||
|
'and ensure proper cleanup of event listeners and timers.'
|
||||||
|
),
|
||||||
|
raw_data=memory_info
|
||||||
|
))
|
||||||
|
|
||||||
|
# Analyze console messages for errors
|
||||||
|
errors = [m for m in console_messages if m['type'] == 'error']
|
||||||
|
warnings = [m for m in console_messages if m['type'] == 'warning']
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='console_errors_count',
|
||||||
|
display_name='Console Errors',
|
||||||
|
value=float(len(errors)),
|
||||||
|
unit='count',
|
||||||
|
source='playwright'
|
||||||
|
))
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='console_warnings_count',
|
||||||
|
display_name='Console Warnings',
|
||||||
|
value=float(len(warnings)),
|
||||||
|
unit='count',
|
||||||
|
source='playwright'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Create issues for console errors
|
||||||
|
if errors:
|
||||||
|
# Group similar errors
|
||||||
|
error_texts = set(e['text'][:200] for e in errors)
|
||||||
|
for error_text in list(error_texts)[:10]: # Limit to 10 unique errors
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='content',
|
||||||
|
severity='medium',
|
||||||
|
title='JavaScript console error',
|
||||||
|
description=f'JavaScript error logged to console: {error_text}',
|
||||||
|
tool='playwright',
|
||||||
|
affected_url=url,
|
||||||
|
remediation='Review and fix the JavaScript error in your code.',
|
||||||
|
raw_data={'error': error_text}
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check for failed network requests
|
||||||
|
if failed_requests:
|
||||||
|
for req in failed_requests[:5]: # Limit reported
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='content',
|
||||||
|
severity='low',
|
||||||
|
title='Failed network request',
|
||||||
|
description=(
|
||||||
|
f"Request to {req['url'][:100]} failed: {req.get('failure', 'Unknown error')}"
|
||||||
|
),
|
||||||
|
tool='playwright',
|
||||||
|
affected_url=req['url'],
|
||||||
|
remediation='Ensure the resource is available and CORS is configured correctly.',
|
||||||
|
raw_data=req
|
||||||
|
))
|
||||||
|
|
||||||
|
# Find large resources
|
||||||
|
large_threshold = settings.SCANNER_CONFIG.get('LARGE_IMAGE_THRESHOLD_BYTES', 1024 * 1024)
|
||||||
|
large_resources = [
|
||||||
|
r for r in resources
|
||||||
|
if r.get('transferSize', 0) > large_threshold
|
||||||
|
]
|
||||||
|
|
||||||
|
for resource in large_resources[:5]: # Limit reported
|
||||||
|
size_mb = resource['transferSize'] / (1024 * 1024)
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='resources',
|
||||||
|
severity='medium' if size_mb > 2 else 'low',
|
||||||
|
title=f"Large resource detected ({size_mb:.1f} MB)",
|
||||||
|
description=(
|
||||||
|
f"The resource '{resource['name'][-80:]}' is {size_mb:.2f} MB. "
|
||||||
|
"Large resources increase page load time and bandwidth usage."
|
||||||
|
),
|
||||||
|
tool='playwright',
|
||||||
|
affected_url=resource['name'],
|
||||||
|
remediation=(
|
||||||
|
'Optimize images using compression, use appropriate formats (WebP, AVIF), '
|
||||||
|
'implement lazy loading, or consider a CDN.'
|
||||||
|
),
|
||||||
|
raw_data=resource
|
||||||
|
))
|
||||||
|
|
||||||
|
raw_data['large_resources'] = large_resources
|
||||||
|
|
||||||
|
# Count resources by type
|
||||||
|
resource_counts = {}
|
||||||
|
for req in network_requests:
|
||||||
|
rtype = req.get('resource_type', 'other')
|
||||||
|
resource_counts[rtype] = resource_counts.get(rtype, 0) + 1
|
||||||
|
|
||||||
|
raw_data['resource_counts'] = resource_counts
|
||||||
|
|
||||||
|
# Check for excessive requests
|
||||||
|
if len(network_requests) > 100:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='performance',
|
||||||
|
severity='medium',
|
||||||
|
title='High number of network requests',
|
||||||
|
description=(
|
||||||
|
f'Page made {len(network_requests)} network requests. '
|
||||||
|
'Excessive requests increase page load time and server load.'
|
||||||
|
),
|
||||||
|
tool='playwright',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
'Consolidate resources, use HTTP/2 multiplexing, implement '
|
||||||
|
'resource bundling, and lazy load non-critical resources.'
|
||||||
|
),
|
||||||
|
raw_data=resource_counts
|
||||||
|
))
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Playwright scan complete: {len(issues)} issues, {len(metrics)} metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=self.name,
|
||||||
|
status=ScannerStatus.SUCCESS,
|
||||||
|
issues=issues,
|
||||||
|
metrics=metrics,
|
||||||
|
raw_data=raw_data
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,314 @@
|
||||||
|
"""
|
||||||
|
Scan Runner - Orchestrates multiple scanners.
|
||||||
|
|
||||||
|
This module coordinates running all enabled scanners against a URL
|
||||||
|
and aggregates their results into a unified report.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from typing import Any, Dict, List, Optional, Type
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .base import BaseScanner, ScannerResult, ScannerStatus
|
||||||
|
from .lighthouse import LighthouseScanner
|
||||||
|
from .playwright_scanner import PlaywrightScanner
|
||||||
|
from .zap import ZAPScanner
|
||||||
|
from .headers import HeaderScanner
|
||||||
|
from .tls import TLSScanner
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Default scanner classes to run
|
||||||
|
DEFAULT_SCANNERS: List[Type[BaseScanner]] = [
|
||||||
|
LighthouseScanner,
|
||||||
|
PlaywrightScanner,
|
||||||
|
ZAPScanner,
|
||||||
|
HeaderScanner,
|
||||||
|
TLSScanner,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class ScanRunner:
|
||||||
|
"""
|
||||||
|
Orchestrates running multiple scanners and aggregating results.
|
||||||
|
|
||||||
|
This class manages:
|
||||||
|
- Running enabled scanners in parallel or sequence
|
||||||
|
- Aggregating results from all scanners
|
||||||
|
- Error handling and partial result compilation
|
||||||
|
- Timeout management
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
scanner_classes: Optional[List[Type[BaseScanner]]] = None,
|
||||||
|
config: Optional[Dict[str, Any]] = None,
|
||||||
|
max_workers: int = 3
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize the scan runner.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scanner_classes: List of scanner classes to use (defaults to all)
|
||||||
|
config: Configuration dict passed to each scanner
|
||||||
|
max_workers: Maximum concurrent scanner threads
|
||||||
|
"""
|
||||||
|
self.scanner_classes = scanner_classes or DEFAULT_SCANNERS
|
||||||
|
self.config = config or {}
|
||||||
|
self.max_workers = max_workers
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def run(self, url: str, parallel: bool = True) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Run all scanners against the URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to scan
|
||||||
|
parallel: Whether to run scanners in parallel
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Aggregated results dictionary containing:
|
||||||
|
- status: Overall scan status
|
||||||
|
- scores: Aggregated scores
|
||||||
|
- issues: All issues from all scanners
|
||||||
|
- metrics: All metrics from all scanners
|
||||||
|
- scanner_results: Individual scanner results
|
||||||
|
- errors: Any scanner errors
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Starting scan runner for {url} with {len(self.scanner_classes)} scanners")
|
||||||
|
|
||||||
|
# Initialize scanners
|
||||||
|
scanners = self._initialize_scanners()
|
||||||
|
|
||||||
|
# Run scanners
|
||||||
|
if parallel:
|
||||||
|
results = self._run_parallel(scanners, url)
|
||||||
|
else:
|
||||||
|
results = self._run_sequential(scanners, url)
|
||||||
|
|
||||||
|
# Aggregate results
|
||||||
|
aggregated = self._aggregate_results(results)
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Scan complete: {len(aggregated['issues'])} issues, "
|
||||||
|
f"{len(aggregated['metrics'])} metrics, "
|
||||||
|
f"status: {aggregated['status']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return aggregated
|
||||||
|
|
||||||
|
def _initialize_scanners(self) -> List[BaseScanner]:
|
||||||
|
"""Initialize scanner instances."""
|
||||||
|
scanners = []
|
||||||
|
scanner_config = settings.SCANNER_CONFIG
|
||||||
|
|
||||||
|
for scanner_class in self.scanner_classes:
|
||||||
|
try:
|
||||||
|
# Merge default config with scanner-specific config
|
||||||
|
config = {**self.config}
|
||||||
|
|
||||||
|
# Add scanner-specific config
|
||||||
|
if scanner_class == LighthouseScanner:
|
||||||
|
config['service_url'] = 'http://lighthouse:3001'
|
||||||
|
config['timeout'] = scanner_config.get('LIGHTHOUSE_TIMEOUT', 60)
|
||||||
|
elif scanner_class == ZAPScanner:
|
||||||
|
config['zap_host'] = scanner_config.get('ZAP_HOST')
|
||||||
|
config['api_key'] = scanner_config.get('ZAP_API_KEY')
|
||||||
|
config['timeout'] = scanner_config.get('ZAP_TIMEOUT', 120)
|
||||||
|
elif scanner_class == PlaywrightScanner:
|
||||||
|
config['timeout'] = scanner_config.get('PLAYWRIGHT_TIMEOUT', 30000)
|
||||||
|
config['viewport'] = scanner_config.get('PLAYWRIGHT_VIEWPORT', {'width': 1920, 'height': 1080})
|
||||||
|
|
||||||
|
scanner = scanner_class(config=config)
|
||||||
|
scanners.append(scanner)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Failed to initialize {scanner_class.__name__}: {e}")
|
||||||
|
|
||||||
|
return scanners
|
||||||
|
|
||||||
|
def _run_parallel(
|
||||||
|
self,
|
||||||
|
scanners: List[BaseScanner],
|
||||||
|
url: str
|
||||||
|
) -> Dict[str, ScannerResult]:
|
||||||
|
"""Run scanners in parallel using thread pool."""
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||||
|
# Submit all scanner tasks
|
||||||
|
future_to_scanner = {
|
||||||
|
executor.submit(self._run_scanner, scanner, url): scanner
|
||||||
|
for scanner in scanners
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collect results as they complete
|
||||||
|
for future in as_completed(future_to_scanner):
|
||||||
|
scanner = future_to_scanner[future]
|
||||||
|
try:
|
||||||
|
result = future.result()
|
||||||
|
results[scanner.name] = result
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Scanner {scanner.name} raised exception: {e}")
|
||||||
|
results[scanner.name] = ScannerResult(
|
||||||
|
scanner_name=scanner.name,
|
||||||
|
status=ScannerStatus.FAILED,
|
||||||
|
error_message=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _run_sequential(
|
||||||
|
self,
|
||||||
|
scanners: List[BaseScanner],
|
||||||
|
url: str
|
||||||
|
) -> Dict[str, ScannerResult]:
|
||||||
|
"""Run scanners sequentially."""
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for scanner in scanners:
|
||||||
|
result = self._run_scanner(scanner, url)
|
||||||
|
results[scanner.name] = result
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _run_scanner(self, scanner: BaseScanner, url: str) -> ScannerResult:
|
||||||
|
"""Run a single scanner with error handling."""
|
||||||
|
self.logger.info(f"Running scanner: {scanner.name}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check availability first
|
||||||
|
if not scanner.is_available():
|
||||||
|
self.logger.warning(f"Scanner {scanner.name} is not available")
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=scanner.name,
|
||||||
|
status=ScannerStatus.SKIPPED,
|
||||||
|
error_message=f"{scanner.name} service is not available"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run the scanner
|
||||||
|
result = scanner.run(url)
|
||||||
|
self.logger.info(
|
||||||
|
f"Scanner {scanner.name} completed with status: {result.status}"
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Scanner {scanner.name} failed: {e}")
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=scanner.name,
|
||||||
|
status=ScannerStatus.FAILED,
|
||||||
|
error_message=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
def _aggregate_results(
|
||||||
|
self,
|
||||||
|
results: Dict[str, ScannerResult]
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Aggregate results from all scanners."""
|
||||||
|
all_issues = []
|
||||||
|
all_metrics = []
|
||||||
|
all_scores = {}
|
||||||
|
raw_data = {}
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
successful_scanners = 0
|
||||||
|
failed_scanners = 0
|
||||||
|
|
||||||
|
for scanner_name, result in results.items():
|
||||||
|
# Track scanner status
|
||||||
|
if result.status == ScannerStatus.SUCCESS:
|
||||||
|
successful_scanners += 1
|
||||||
|
elif result.status == ScannerStatus.FAILED:
|
||||||
|
failed_scanners += 1
|
||||||
|
if result.error_message:
|
||||||
|
errors.append({
|
||||||
|
'scanner': scanner_name,
|
||||||
|
'error': result.error_message
|
||||||
|
})
|
||||||
|
elif result.status == ScannerStatus.PARTIAL:
|
||||||
|
successful_scanners += 1
|
||||||
|
|
||||||
|
# Collect issues
|
||||||
|
for issue in result.issues:
|
||||||
|
all_issues.append({
|
||||||
|
'category': issue.category,
|
||||||
|
'severity': issue.severity,
|
||||||
|
'title': issue.title,
|
||||||
|
'description': issue.description,
|
||||||
|
'tool': issue.tool,
|
||||||
|
'affected_url': issue.affected_url,
|
||||||
|
'remediation': issue.remediation,
|
||||||
|
'raw_data': issue.raw_data,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Collect metrics
|
||||||
|
for metric in result.metrics:
|
||||||
|
all_metrics.append({
|
||||||
|
'name': metric.name,
|
||||||
|
'display_name': metric.display_name,
|
||||||
|
'value': metric.value,
|
||||||
|
'unit': metric.unit,
|
||||||
|
'source': metric.source,
|
||||||
|
'score': metric.score,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Collect scores
|
||||||
|
if result.scores:
|
||||||
|
all_scores[scanner_name] = result.scores
|
||||||
|
|
||||||
|
# Store raw data
|
||||||
|
if result.raw_data:
|
||||||
|
raw_data[scanner_name] = result.raw_data
|
||||||
|
|
||||||
|
# Determine overall status
|
||||||
|
if failed_scanners == len(results):
|
||||||
|
overall_status = 'failed'
|
||||||
|
elif failed_scanners > 0:
|
||||||
|
overall_status = 'partial'
|
||||||
|
else:
|
||||||
|
overall_status = 'done'
|
||||||
|
|
||||||
|
# Calculate aggregated scores
|
||||||
|
aggregated_scores = self._calculate_aggregated_scores(all_scores)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'status': overall_status,
|
||||||
|
'scores': aggregated_scores,
|
||||||
|
'issues': all_issues,
|
||||||
|
'metrics': all_metrics,
|
||||||
|
'scanner_results': {
|
||||||
|
name: {
|
||||||
|
'status': result.status.value,
|
||||||
|
'error': result.error_message,
|
||||||
|
}
|
||||||
|
for name, result in results.items()
|
||||||
|
},
|
||||||
|
'raw_data': raw_data,
|
||||||
|
'errors': errors,
|
||||||
|
'summary': {
|
||||||
|
'total_scanners': len(results),
|
||||||
|
'successful': successful_scanners,
|
||||||
|
'failed': failed_scanners,
|
||||||
|
'total_issues': len(all_issues),
|
||||||
|
'total_metrics': len(all_metrics),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _calculate_aggregated_scores(
|
||||||
|
self,
|
||||||
|
scanner_scores: Dict[str, Dict[str, int]]
|
||||||
|
) -> Dict[str, Optional[int]]:
|
||||||
|
"""Calculate aggregated scores from all scanners."""
|
||||||
|
# Lighthouse provides the main scores
|
||||||
|
lighthouse_scores = scanner_scores.get('lighthouse', {})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'performance': lighthouse_scores.get('performance'),
|
||||||
|
'accessibility': lighthouse_scores.get('accessibility'),
|
||||||
|
'best_practices': lighthouse_scores.get('best_practices'),
|
||||||
|
'seo': lighthouse_scores.get('seo'),
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,380 @@
|
||||||
|
"""
|
||||||
|
TLS/SSL Security Scanner.
|
||||||
|
|
||||||
|
This module checks TLS/SSL configuration and certificate validity.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import socket
|
||||||
|
import ssl
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from .base import (
|
||||||
|
BaseScanner,
|
||||||
|
ScannerResult,
|
||||||
|
ScannerStatus,
|
||||||
|
IssueData,
|
||||||
|
MetricData,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TLSScanner(BaseScanner):
|
||||||
|
"""
|
||||||
|
Scanner for TLS/SSL certificate and configuration.
|
||||||
|
|
||||||
|
Checks:
|
||||||
|
- Certificate validity
|
||||||
|
- Certificate expiration
|
||||||
|
- HTTPS availability
|
||||||
|
- HTTP to HTTPS redirect
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "tls_check"
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||||
|
super().__init__(config)
|
||||||
|
self.timeout = self.config.get('timeout', 10)
|
||||||
|
|
||||||
|
def run(self, url: str) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Run TLS/SSL analysis on the URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScannerResult with TLS findings
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Starting TLS scan for {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
hostname = parsed.netloc.split(':')[0]
|
||||||
|
port = parsed.port or (443 if parsed.scheme == 'https' else 80)
|
||||||
|
|
||||||
|
issues = []
|
||||||
|
metrics = []
|
||||||
|
raw_data = {}
|
||||||
|
|
||||||
|
# Check if site is HTTPS
|
||||||
|
if parsed.scheme == 'http':
|
||||||
|
# Check if HTTPS is available
|
||||||
|
https_available, https_result = self._check_https_available(hostname)
|
||||||
|
raw_data['https_available'] = https_available
|
||||||
|
raw_data['https_check'] = https_result
|
||||||
|
|
||||||
|
if https_available:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='tls',
|
||||||
|
severity='high',
|
||||||
|
title='Site accessed over HTTP but HTTPS is available',
|
||||||
|
description=(
|
||||||
|
'The site was accessed over unencrypted HTTP, but HTTPS '
|
||||||
|
'appears to be available. All traffic should use HTTPS.'
|
||||||
|
),
|
||||||
|
tool='tls_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
'Redirect all HTTP traffic to HTTPS using a 301 redirect. '
|
||||||
|
'Implement HSTS to prevent future HTTP access.'
|
||||||
|
)
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='tls',
|
||||||
|
severity='critical',
|
||||||
|
title='Site does not support HTTPS',
|
||||||
|
description=(
|
||||||
|
'The site does not appear to have HTTPS configured. '
|
||||||
|
'All data transmitted is unencrypted and vulnerable to interception.'
|
||||||
|
),
|
||||||
|
tool='tls_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
'Configure TLS/SSL for your server. Obtain a certificate from '
|
||||||
|
"Let's Encrypt (free) or a commercial CA."
|
||||||
|
)
|
||||||
|
))
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='tls_enabled',
|
||||||
|
display_name='TLS Enabled',
|
||||||
|
value=0.0,
|
||||||
|
unit='score',
|
||||||
|
source='tls_check'
|
||||||
|
))
|
||||||
|
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=self.name,
|
||||||
|
status=ScannerStatus.SUCCESS,
|
||||||
|
issues=issues,
|
||||||
|
metrics=metrics,
|
||||||
|
raw_data=raw_data
|
||||||
|
)
|
||||||
|
|
||||||
|
# For HTTPS URLs, check certificate
|
||||||
|
cert_info = self._get_certificate_info(hostname, port)
|
||||||
|
raw_data['certificate'] = cert_info
|
||||||
|
|
||||||
|
if cert_info.get('error'):
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='tls',
|
||||||
|
severity='critical',
|
||||||
|
title='Certificate validation failed',
|
||||||
|
description=f"SSL certificate error: {cert_info['error']}",
|
||||||
|
tool='tls_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
'Ensure your SSL certificate is valid, not expired, '
|
||||||
|
'and properly configured for your domain.'
|
||||||
|
)
|
||||||
|
))
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='certificate_valid',
|
||||||
|
display_name='Certificate Valid',
|
||||||
|
value=0.0,
|
||||||
|
unit='score',
|
||||||
|
source='tls_check'
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
# Certificate is valid
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='certificate_valid',
|
||||||
|
display_name='Certificate Valid',
|
||||||
|
value=1.0,
|
||||||
|
unit='score',
|
||||||
|
source='tls_check'
|
||||||
|
))
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='tls_enabled',
|
||||||
|
display_name='TLS Enabled',
|
||||||
|
value=1.0,
|
||||||
|
unit='score',
|
||||||
|
source='tls_check'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check expiration
|
||||||
|
if cert_info.get('expires'):
|
||||||
|
try:
|
||||||
|
expires = datetime.strptime(
|
||||||
|
cert_info['expires'],
|
||||||
|
'%b %d %H:%M:%S %Y %Z'
|
||||||
|
)
|
||||||
|
expires = expires.replace(tzinfo=timezone.utc)
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
days_until_expiry = (expires - now).days
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='certificate_days_until_expiry',
|
||||||
|
display_name='Days Until Certificate Expiry',
|
||||||
|
value=float(days_until_expiry),
|
||||||
|
unit='count',
|
||||||
|
source='tls_check'
|
||||||
|
))
|
||||||
|
|
||||||
|
if days_until_expiry <= 0:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='tls',
|
||||||
|
severity='critical',
|
||||||
|
title='SSL certificate has expired',
|
||||||
|
description=(
|
||||||
|
f"The SSL certificate expired on {cert_info['expires']}. "
|
||||||
|
"Users will see security warnings."
|
||||||
|
),
|
||||||
|
tool='tls_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation='Renew your SSL certificate immediately.'
|
||||||
|
))
|
||||||
|
elif days_until_expiry <= 7:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='tls',
|
||||||
|
severity='high',
|
||||||
|
title='SSL certificate expiring very soon',
|
||||||
|
description=(
|
||||||
|
f"The SSL certificate will expire in {days_until_expiry} days "
|
||||||
|
f"(on {cert_info['expires']}). Renew immediately."
|
||||||
|
),
|
||||||
|
tool='tls_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation='Renew your SSL certificate before it expires.'
|
||||||
|
))
|
||||||
|
elif days_until_expiry <= 30:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='tls',
|
||||||
|
severity='medium',
|
||||||
|
title='SSL certificate expiring soon',
|
||||||
|
description=(
|
||||||
|
f"The SSL certificate will expire in {days_until_expiry} days "
|
||||||
|
f"(on {cert_info['expires']}). Plan for renewal."
|
||||||
|
),
|
||||||
|
tool='tls_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
'Renew your SSL certificate before expiration. '
|
||||||
|
"Consider using auto-renewal with Let's Encrypt."
|
||||||
|
)
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not parse certificate expiry: {e}")
|
||||||
|
|
||||||
|
# Check certificate subject matches hostname
|
||||||
|
if cert_info.get('subject'):
|
||||||
|
subject_cn = dict(x[0] for x in cert_info['subject']).get('commonName', '')
|
||||||
|
san = cert_info.get('subjectAltName', [])
|
||||||
|
san_names = [name for type_, name in san if type_ == 'DNS']
|
||||||
|
|
||||||
|
hostname_matched = self._hostname_matches_cert(
|
||||||
|
hostname, subject_cn, san_names
|
||||||
|
)
|
||||||
|
|
||||||
|
if not hostname_matched:
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='tls',
|
||||||
|
severity='high',
|
||||||
|
title='Certificate hostname mismatch',
|
||||||
|
description=(
|
||||||
|
f"The SSL certificate is for '{subject_cn}' but "
|
||||||
|
f"the site is accessed as '{hostname}'."
|
||||||
|
),
|
||||||
|
tool='tls_check',
|
||||||
|
affected_url=url,
|
||||||
|
remediation=(
|
||||||
|
'Obtain a certificate that includes your domain name, '
|
||||||
|
'or add it to the Subject Alternative Names (SAN).'
|
||||||
|
)
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check for HTTP to HTTPS redirect
|
||||||
|
if parsed.scheme == 'https':
|
||||||
|
redirect_info = self._check_http_redirect(hostname)
|
||||||
|
raw_data['http_redirect'] = redirect_info
|
||||||
|
|
||||||
|
if not redirect_info.get('redirects_to_https'):
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='tls',
|
||||||
|
severity='medium',
|
||||||
|
title='No HTTP to HTTPS redirect',
|
||||||
|
description=(
|
||||||
|
'The site does not redirect HTTP requests to HTTPS. '
|
||||||
|
'Users accessing via HTTP will use an insecure connection.'
|
||||||
|
),
|
||||||
|
tool='tls_check',
|
||||||
|
affected_url=f"http://{hostname}",
|
||||||
|
remediation=(
|
||||||
|
'Configure your server to redirect all HTTP (port 80) '
|
||||||
|
'requests to HTTPS (port 443) with a 301 redirect.'
|
||||||
|
)
|
||||||
|
))
|
||||||
|
|
||||||
|
self.logger.info(f"TLS scan complete: {len(issues)} issues")
|
||||||
|
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=self.name,
|
||||||
|
status=ScannerStatus.SUCCESS,
|
||||||
|
issues=issues,
|
||||||
|
metrics=metrics,
|
||||||
|
raw_data=raw_data
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return self._create_error_result(e)
|
||||||
|
|
||||||
|
def _check_https_available(self, hostname: str) -> tuple:
|
||||||
|
"""Check if HTTPS is available for the hostname."""
|
||||||
|
try:
|
||||||
|
context = ssl.create_default_context()
|
||||||
|
with socket.create_connection((hostname, 443), timeout=self.timeout) as sock:
|
||||||
|
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
|
||||||
|
return True, {'available': True, 'protocol': ssock.version()}
|
||||||
|
except ssl.SSLError as e:
|
||||||
|
return True, {'available': True, 'error': str(e)}
|
||||||
|
except Exception as e:
|
||||||
|
return False, {'available': False, 'error': str(e)}
|
||||||
|
|
||||||
|
def _get_certificate_info(self, hostname: str, port: int = 443) -> Dict:
|
||||||
|
"""Get SSL certificate information."""
|
||||||
|
try:
|
||||||
|
context = ssl.create_default_context()
|
||||||
|
|
||||||
|
with socket.create_connection((hostname, port), timeout=self.timeout) as sock:
|
||||||
|
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
|
||||||
|
cert = ssock.getpeercert()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'subject': cert.get('subject'),
|
||||||
|
'issuer': cert.get('issuer'),
|
||||||
|
'version': cert.get('version'),
|
||||||
|
'serialNumber': cert.get('serialNumber'),
|
||||||
|
'notBefore': cert.get('notBefore'),
|
||||||
|
'expires': cert.get('notAfter'),
|
||||||
|
'subjectAltName': cert.get('subjectAltName', []),
|
||||||
|
'protocol': ssock.version(),
|
||||||
|
'cipher': ssock.cipher(),
|
||||||
|
}
|
||||||
|
except ssl.SSLCertVerificationError as e:
|
||||||
|
return {'error': f"Certificate verification failed: {e.verify_message}"}
|
||||||
|
except ssl.SSLError as e:
|
||||||
|
return {'error': f"SSL error: {str(e)}"}
|
||||||
|
except socket.timeout:
|
||||||
|
return {'error': "Connection timed out"}
|
||||||
|
except Exception as e:
|
||||||
|
return {'error': str(e)}
|
||||||
|
|
||||||
|
def _hostname_matches_cert(
|
||||||
|
self,
|
||||||
|
hostname: str,
|
||||||
|
cn: str,
|
||||||
|
san_names: list
|
||||||
|
) -> bool:
|
||||||
|
"""Check if hostname matches certificate CN or SAN."""
|
||||||
|
all_names = [cn] + san_names
|
||||||
|
|
||||||
|
for name in all_names:
|
||||||
|
if name == hostname:
|
||||||
|
return True
|
||||||
|
# Handle wildcard certificates
|
||||||
|
if name.startswith('*.'):
|
||||||
|
domain = name[2:]
|
||||||
|
if hostname.endswith(domain):
|
||||||
|
# Ensure wildcard only matches one level
|
||||||
|
prefix = hostname[:-len(domain)-1]
|
||||||
|
if '.' not in prefix:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _check_http_redirect(self, hostname: str) -> Dict:
|
||||||
|
"""Check if HTTP redirects to HTTPS."""
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
try:
|
||||||
|
with httpx.Client(
|
||||||
|
timeout=self.timeout,
|
||||||
|
follow_redirects=False
|
||||||
|
) as client:
|
||||||
|
response = client.get(f"http://{hostname}")
|
||||||
|
|
||||||
|
if response.status_code in (301, 302, 303, 307, 308):
|
||||||
|
location = response.headers.get('location', '')
|
||||||
|
redirects_to_https = location.startswith('https://')
|
||||||
|
return {
|
||||||
|
'redirects_to_https': redirects_to_https,
|
||||||
|
'status_code': response.status_code,
|
||||||
|
'location': location,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
'redirects_to_https': False,
|
||||||
|
'status_code': response.status_code,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
'redirects_to_https': False,
|
||||||
|
'error': str(e),
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,307 @@
|
||||||
|
"""
|
||||||
|
OWASP ZAP Scanner Integration.
|
||||||
|
|
||||||
|
This module integrates with OWASP ZAP for security scanning,
|
||||||
|
detecting vulnerabilities like XSS, injection flaws, and
|
||||||
|
misconfigurations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .base import (
|
||||||
|
BaseScanner,
|
||||||
|
ScannerResult,
|
||||||
|
ScannerStatus,
|
||||||
|
IssueData,
|
||||||
|
MetricData,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ZAPScanner(BaseScanner):
|
||||||
|
"""
|
||||||
|
Scanner using OWASP ZAP for security vulnerability detection.
|
||||||
|
|
||||||
|
Performs baseline scans to identify common security issues:
|
||||||
|
- XSS vulnerabilities
|
||||||
|
- SQL injection patterns
|
||||||
|
- Insecure cookies
|
||||||
|
- Missing security headers
|
||||||
|
- SSL/TLS issues
|
||||||
|
- And more...
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "owasp_zap"
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||||
|
super().__init__(config)
|
||||||
|
scanner_config = settings.SCANNER_CONFIG
|
||||||
|
self.zap_host = self.config.get('zap_host', scanner_config.get('ZAP_HOST', 'http://zap:8080'))
|
||||||
|
self.api_key = self.config.get('api_key', scanner_config.get('ZAP_API_KEY', ''))
|
||||||
|
self.timeout = self.config.get('timeout', scanner_config.get('ZAP_TIMEOUT', 120))
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
"""Check if ZAP service is available."""
|
||||||
|
try:
|
||||||
|
with httpx.Client(timeout=10) as client:
|
||||||
|
response = client.get(
|
||||||
|
f"{self.zap_host}/JSON/core/view/version/",
|
||||||
|
params={'apikey': self.api_key}
|
||||||
|
)
|
||||||
|
return response.status_code == 200
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"ZAP service not available: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def run(self, url: str) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Run ZAP security scan against the URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to scan
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScannerResult with security findings
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Starting ZAP scan for {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Access the target to populate ZAP's site tree
|
||||||
|
self._access_url(url)
|
||||||
|
|
||||||
|
# Spider the site (limited crawl)
|
||||||
|
self._spider_url(url)
|
||||||
|
|
||||||
|
# Run active scan
|
||||||
|
self._active_scan(url)
|
||||||
|
|
||||||
|
# Get alerts
|
||||||
|
alerts = self._get_alerts(url)
|
||||||
|
|
||||||
|
return self._parse_results(url, alerts)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
return self._create_error_result(
|
||||||
|
Exception("ZAP scan timed out")
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
return self._create_error_result(
|
||||||
|
Exception(f"ZAP service error: {e.response.status_code}")
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return self._create_error_result(e)
|
||||||
|
|
||||||
|
def _zap_request(self, endpoint: str, params: Optional[Dict] = None) -> Dict:
|
||||||
|
"""Make a request to the ZAP API."""
|
||||||
|
if params is None:
|
||||||
|
params = {}
|
||||||
|
params['apikey'] = self.api_key
|
||||||
|
|
||||||
|
with httpx.Client(timeout=self.timeout) as client:
|
||||||
|
response = client.get(
|
||||||
|
f"{self.zap_host}{endpoint}",
|
||||||
|
params=params
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def _access_url(self, url: str) -> None:
|
||||||
|
"""Access the URL to add it to ZAP's site tree."""
|
||||||
|
self.logger.debug(f"Accessing URL in ZAP: {url}")
|
||||||
|
self._zap_request(
|
||||||
|
'/JSON/core/action/accessUrl/',
|
||||||
|
{'url': url, 'followRedirects': 'true'}
|
||||||
|
)
|
||||||
|
time.sleep(2) # Wait for ZAP to process
|
||||||
|
|
||||||
|
def _spider_url(self, url: str) -> None:
|
||||||
|
"""Spider the URL to discover pages."""
|
||||||
|
self.logger.debug(f"Spidering URL: {url}")
|
||||||
|
|
||||||
|
# Start spider
|
||||||
|
result = self._zap_request(
|
||||||
|
'/JSON/spider/action/scan/',
|
||||||
|
{
|
||||||
|
'url': url,
|
||||||
|
'maxChildren': '5', # Limited crawl
|
||||||
|
'recurse': 'true',
|
||||||
|
'subtreeOnly': 'true'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
scan_id = result.get('scan')
|
||||||
|
if not scan_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Wait for spider to complete (with timeout)
|
||||||
|
start_time = time.time()
|
||||||
|
while time.time() - start_time < 60: # 60 second spider timeout
|
||||||
|
status = self._zap_request(
|
||||||
|
'/JSON/spider/view/status/',
|
||||||
|
{'scanId': scan_id}
|
||||||
|
)
|
||||||
|
if int(status.get('status', '100')) >= 100:
|
||||||
|
break
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
def _active_scan(self, url: str) -> None:
|
||||||
|
"""Run active scan against the URL."""
|
||||||
|
self.logger.debug(f"Starting active scan: {url}")
|
||||||
|
|
||||||
|
# Start active scan
|
||||||
|
result = self._zap_request(
|
||||||
|
'/JSON/ascan/action/scan/',
|
||||||
|
{
|
||||||
|
'url': url,
|
||||||
|
'recurse': 'true',
|
||||||
|
'inScopeOnly': 'true'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
scan_id = result.get('scan')
|
||||||
|
if not scan_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Wait for scan to complete (with timeout)
|
||||||
|
start_time = time.time()
|
||||||
|
while time.time() - start_time < self.timeout:
|
||||||
|
status = self._zap_request(
|
||||||
|
'/JSON/ascan/view/status/',
|
||||||
|
{'scanId': scan_id}
|
||||||
|
)
|
||||||
|
if int(status.get('status', '100')) >= 100:
|
||||||
|
break
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
def _get_alerts(self, url: str) -> List[Dict]:
|
||||||
|
"""Get alerts for the scanned URL."""
|
||||||
|
self.logger.debug(f"Fetching alerts for: {url}")
|
||||||
|
|
||||||
|
result = self._zap_request(
|
||||||
|
'/JSON/core/view/alerts/',
|
||||||
|
{
|
||||||
|
'baseurl': url,
|
||||||
|
'start': '0',
|
||||||
|
'count': '100' # Limit alerts
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.get('alerts', [])
|
||||||
|
|
||||||
|
def _parse_results(self, url: str, alerts: List[Dict]) -> ScannerResult:
|
||||||
|
"""
|
||||||
|
Parse ZAP alerts into ScannerResult format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The scanned URL
|
||||||
|
alerts: List of ZAP alerts
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parsed ScannerResult
|
||||||
|
"""
|
||||||
|
issues = []
|
||||||
|
metrics = []
|
||||||
|
|
||||||
|
# Count alerts by risk level
|
||||||
|
risk_counts = {
|
||||||
|
'High': 0,
|
||||||
|
'Medium': 0,
|
||||||
|
'Low': 0,
|
||||||
|
'Informational': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for alert in alerts:
|
||||||
|
risk = alert.get('risk', 'Informational')
|
||||||
|
risk_counts[risk] = risk_counts.get(risk, 0) + 1
|
||||||
|
|
||||||
|
severity = self._map_risk_to_severity(risk)
|
||||||
|
|
||||||
|
issues.append(IssueData(
|
||||||
|
category='security',
|
||||||
|
severity=severity,
|
||||||
|
title=alert.get('name', 'Unknown vulnerability'),
|
||||||
|
description=self._format_description(alert),
|
||||||
|
tool='owasp_zap',
|
||||||
|
affected_url=alert.get('url', url),
|
||||||
|
remediation=alert.get('solution', 'Review and fix the vulnerability.'),
|
||||||
|
raw_data={
|
||||||
|
'alert_ref': alert.get('alertRef'),
|
||||||
|
'cweid': alert.get('cweid'),
|
||||||
|
'wascid': alert.get('wascid'),
|
||||||
|
'confidence': alert.get('confidence'),
|
||||||
|
'evidence': alert.get('evidence', '')[:500], # Truncate evidence
|
||||||
|
}
|
||||||
|
))
|
||||||
|
|
||||||
|
# Create metrics for vulnerability counts
|
||||||
|
for risk_level, count in risk_counts.items():
|
||||||
|
if count > 0:
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name=f'zap_{risk_level.lower()}_alerts',
|
||||||
|
display_name=f'{risk_level} Risk Alerts',
|
||||||
|
value=float(count),
|
||||||
|
unit='count',
|
||||||
|
source='owasp_zap'
|
||||||
|
))
|
||||||
|
|
||||||
|
metrics.append(MetricData(
|
||||||
|
name='total_security_alerts',
|
||||||
|
display_name='Total Security Alerts',
|
||||||
|
value=float(len(alerts)),
|
||||||
|
unit='count',
|
||||||
|
source='owasp_zap'
|
||||||
|
))
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"ZAP scan complete: {len(alerts)} alerts "
|
||||||
|
f"(High: {risk_counts['High']}, Medium: {risk_counts['Medium']}, "
|
||||||
|
f"Low: {risk_counts['Low']})"
|
||||||
|
)
|
||||||
|
|
||||||
|
return ScannerResult(
|
||||||
|
scanner_name=self.name,
|
||||||
|
status=ScannerStatus.SUCCESS,
|
||||||
|
issues=issues,
|
||||||
|
metrics=metrics,
|
||||||
|
raw_data={
|
||||||
|
'total_alerts': len(alerts),
|
||||||
|
'risk_counts': risk_counts,
|
||||||
|
'alerts': alerts[:50] # Store limited raw alerts
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def _map_risk_to_severity(self, risk: str) -> str:
|
||||||
|
"""Map ZAP risk level to our severity."""
|
||||||
|
mapping = {
|
||||||
|
'High': 'high',
|
||||||
|
'Medium': 'medium',
|
||||||
|
'Low': 'low',
|
||||||
|
'Informational': 'info',
|
||||||
|
}
|
||||||
|
return mapping.get(risk, 'info')
|
||||||
|
|
||||||
|
def _format_description(self, alert: Dict) -> str:
|
||||||
|
"""Format ZAP alert into readable description."""
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
if alert.get('description'):
|
||||||
|
parts.append(alert['description'])
|
||||||
|
|
||||||
|
if alert.get('attack'):
|
||||||
|
parts.append(f"\nAttack: {alert['attack']}")
|
||||||
|
|
||||||
|
if alert.get('evidence'):
|
||||||
|
evidence = alert['evidence'][:200]
|
||||||
|
parts.append(f"\nEvidence: {evidence}")
|
||||||
|
|
||||||
|
if alert.get('reference'):
|
||||||
|
parts.append(f"\nReference: {alert['reference']}")
|
||||||
|
|
||||||
|
return '\n'.join(parts)
|
||||||
|
|
@ -0,0 +1,306 @@
|
||||||
|
"""
|
||||||
|
Celery tasks for background scanning.
|
||||||
|
|
||||||
|
This module defines the Celery tasks that orchestrate website scans
|
||||||
|
in the background.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import timedelta
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from celery import shared_task
|
||||||
|
from celery.exceptions import SoftTimeLimitExceeded
|
||||||
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from websites.models import Website, Scan, ScanStatus, Issue, Metric
|
||||||
|
from scanner.scanners import ScanRunner
|
||||||
|
from scanner.utils import validate_url, get_domain_from_url
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task(
|
||||||
|
bind=True,
|
||||||
|
max_retries=2,
|
||||||
|
default_retry_delay=60,
|
||||||
|
soft_time_limit=300,
|
||||||
|
time_limit=330,
|
||||||
|
)
|
||||||
|
def run_scan_task(self, scan_id: str) -> dict:
|
||||||
|
"""
|
||||||
|
Main Celery task for running a website scan.
|
||||||
|
|
||||||
|
This task:
|
||||||
|
1. Updates scan status to running
|
||||||
|
2. Orchestrates all scanners
|
||||||
|
3. Saves results to database
|
||||||
|
4. Handles errors and partial results
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scan_id: UUID of the Scan record
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with scan results summary
|
||||||
|
"""
|
||||||
|
logger.info(f"Starting scan task for scan_id: {scan_id}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get the scan record
|
||||||
|
scan = Scan.objects.select_related('website').get(id=scan_id)
|
||||||
|
except Scan.DoesNotExist:
|
||||||
|
logger.error(f"Scan {scan_id} not found")
|
||||||
|
return {'error': f'Scan {scan_id} not found'}
|
||||||
|
|
||||||
|
# Update status to running
|
||||||
|
scan.status = ScanStatus.RUNNING
|
||||||
|
scan.started_at = timezone.now()
|
||||||
|
scan.celery_task_id = self.request.id
|
||||||
|
scan.save(update_fields=['status', 'started_at', 'celery_task_id'])
|
||||||
|
|
||||||
|
url = scan.website.url
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run the scan pipeline
|
||||||
|
runner = ScanRunner()
|
||||||
|
results = runner.run(url)
|
||||||
|
|
||||||
|
# Save results to database
|
||||||
|
_save_scan_results(scan, results)
|
||||||
|
|
||||||
|
# Update website last_scanned_at
|
||||||
|
scan.website.last_scanned_at = timezone.now()
|
||||||
|
scan.website.save(update_fields=['last_scanned_at'])
|
||||||
|
|
||||||
|
logger.info(f"Scan {scan_id} completed successfully")
|
||||||
|
|
||||||
|
return {
|
||||||
|
'scan_id': str(scan_id),
|
||||||
|
'status': scan.status,
|
||||||
|
'overall_score': scan.overall_score,
|
||||||
|
'issues_count': scan.issues.count(),
|
||||||
|
'metrics_count': scan.metrics.count(),
|
||||||
|
}
|
||||||
|
|
||||||
|
except SoftTimeLimitExceeded:
|
||||||
|
logger.warning(f"Scan {scan_id} timed out")
|
||||||
|
scan.status = ScanStatus.PARTIAL
|
||||||
|
scan.error_message = "Scan timed out before completing all checks"
|
||||||
|
scan.completed_at = timezone.now()
|
||||||
|
scan.save(update_fields=['status', 'error_message', 'completed_at'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'scan_id': str(scan_id),
|
||||||
|
'status': 'partial',
|
||||||
|
'error': 'Scan timed out'
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Scan {scan_id} failed with error: {e}")
|
||||||
|
scan.status = ScanStatus.FAILED
|
||||||
|
scan.error_message = str(e)
|
||||||
|
scan.completed_at = timezone.now()
|
||||||
|
scan.save(update_fields=['status', 'error_message', 'completed_at'])
|
||||||
|
|
||||||
|
# Retry on certain errors
|
||||||
|
if self.request.retries < self.max_retries:
|
||||||
|
raise self.retry(exc=e)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'scan_id': str(scan_id),
|
||||||
|
'status': 'failed',
|
||||||
|
'error': str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _save_scan_results(scan: Scan, results: dict) -> None:
|
||||||
|
"""
|
||||||
|
Save scan results to the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scan: The Scan model instance
|
||||||
|
results: Aggregated results from ScanRunner
|
||||||
|
"""
|
||||||
|
# Update scan status
|
||||||
|
status_map = {
|
||||||
|
'done': ScanStatus.DONE,
|
||||||
|
'partial': ScanStatus.PARTIAL,
|
||||||
|
'failed': ScanStatus.FAILED,
|
||||||
|
}
|
||||||
|
scan.status = status_map.get(results['status'], ScanStatus.DONE)
|
||||||
|
scan.completed_at = timezone.now()
|
||||||
|
|
||||||
|
# Save scores
|
||||||
|
scores = results.get('scores', {})
|
||||||
|
scan.performance_score = scores.get('performance')
|
||||||
|
scan.accessibility_score = scores.get('accessibility')
|
||||||
|
scan.seo_score = scores.get('seo')
|
||||||
|
scan.best_practices_score = scores.get('best_practices')
|
||||||
|
|
||||||
|
# Save raw data
|
||||||
|
raw_data = results.get('raw_data', {})
|
||||||
|
scan.raw_lighthouse_data = raw_data.get('lighthouse')
|
||||||
|
scan.raw_zap_data = raw_data.get('owasp_zap')
|
||||||
|
scan.raw_playwright_data = raw_data.get('playwright')
|
||||||
|
scan.raw_headers_data = raw_data.get('header_check')
|
||||||
|
|
||||||
|
# Save errors if any
|
||||||
|
if results.get('errors'):
|
||||||
|
scan.error_message = '\n'.join(
|
||||||
|
f"{e['scanner']}: {e['error']}"
|
||||||
|
for e in results['errors']
|
||||||
|
)
|
||||||
|
|
||||||
|
scan.save()
|
||||||
|
|
||||||
|
# Create Issue records
|
||||||
|
issues_to_create = []
|
||||||
|
for issue_data in results.get('issues', []):
|
||||||
|
issues_to_create.append(Issue(
|
||||||
|
scan=scan,
|
||||||
|
category=issue_data['category'],
|
||||||
|
severity=issue_data['severity'],
|
||||||
|
title=issue_data['title'][:500], # Truncate if too long
|
||||||
|
description=issue_data['description'],
|
||||||
|
tool=issue_data['tool'],
|
||||||
|
affected_url=issue_data.get('affected_url'),
|
||||||
|
remediation=issue_data.get('remediation'),
|
||||||
|
raw_data=issue_data.get('raw_data'),
|
||||||
|
))
|
||||||
|
|
||||||
|
if issues_to_create:
|
||||||
|
Issue.objects.bulk_create(issues_to_create)
|
||||||
|
|
||||||
|
# Create Metric records
|
||||||
|
metrics_to_create = []
|
||||||
|
seen_metrics = set() # Track unique metrics
|
||||||
|
|
||||||
|
for metric_data in results.get('metrics', []):
|
||||||
|
metric_key = metric_data['name']
|
||||||
|
if metric_key in seen_metrics:
|
||||||
|
continue # Skip duplicates
|
||||||
|
seen_metrics.add(metric_key)
|
||||||
|
|
||||||
|
# Map unit strings to model choices
|
||||||
|
unit_map = {
|
||||||
|
'ms': 'ms',
|
||||||
|
'milliseconds': 'ms',
|
||||||
|
's': 's',
|
||||||
|
'seconds': 's',
|
||||||
|
'bytes': 'bytes',
|
||||||
|
'kb': 'kb',
|
||||||
|
'kilobytes': 'kb',
|
||||||
|
'mb': 'mb',
|
||||||
|
'megabytes': 'mb',
|
||||||
|
'score': 'score',
|
||||||
|
'percent': 'percent',
|
||||||
|
'count': 'count',
|
||||||
|
}
|
||||||
|
unit = unit_map.get(metric_data['unit'].lower(), 'count')
|
||||||
|
|
||||||
|
metrics_to_create.append(Metric(
|
||||||
|
scan=scan,
|
||||||
|
name=metric_data['name'],
|
||||||
|
display_name=metric_data['display_name'][:200],
|
||||||
|
value=metric_data['value'],
|
||||||
|
unit=unit,
|
||||||
|
source=metric_data['source'],
|
||||||
|
score=metric_data.get('score'),
|
||||||
|
))
|
||||||
|
|
||||||
|
if metrics_to_create:
|
||||||
|
Metric.objects.bulk_create(metrics_to_create)
|
||||||
|
|
||||||
|
# Calculate security score based on issues
|
||||||
|
scan.calculate_security_score()
|
||||||
|
|
||||||
|
# Calculate overall score
|
||||||
|
scan.calculate_overall_score()
|
||||||
|
|
||||||
|
scan.save(update_fields=['security_score', 'overall_score'])
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Saved scan results: {len(issues_to_create)} issues, "
|
||||||
|
f"{len(metrics_to_create)} metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task
|
||||||
|
def cleanup_old_scans(days: int = 30) -> dict:
|
||||||
|
"""
|
||||||
|
Clean up old scan data to prevent database growth.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
days: Number of days to keep scans
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with cleanup statistics
|
||||||
|
"""
|
||||||
|
cutoff_date = timezone.now() - timedelta(days=days)
|
||||||
|
|
||||||
|
# Delete old scans (cascades to issues and metrics)
|
||||||
|
deleted_count, _ = Scan.objects.filter(
|
||||||
|
created_at__lt=cutoff_date
|
||||||
|
).delete()
|
||||||
|
|
||||||
|
logger.info(f"Cleaned up {deleted_count} old scans")
|
||||||
|
|
||||||
|
return {
|
||||||
|
'deleted_scans': deleted_count,
|
||||||
|
'cutoff_date': cutoff_date.isoformat(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def check_rate_limit(url: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Check if URL scanning is rate limited.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Error message if rate limited, None otherwise
|
||||||
|
"""
|
||||||
|
from django.core.cache import cache
|
||||||
|
|
||||||
|
scanner_config = settings.SCANNER_CONFIG
|
||||||
|
rate_limit_minutes = scanner_config.get('SCAN_RATE_LIMIT_MINUTES', 5)
|
||||||
|
|
||||||
|
# Create a cache key based on the URL
|
||||||
|
domain = get_domain_from_url(url)
|
||||||
|
cache_key = f"scan_rate_limit:{domain}"
|
||||||
|
|
||||||
|
# Check if already scanned recently
|
||||||
|
last_scan_time = cache.get(cache_key)
|
||||||
|
if last_scan_time:
|
||||||
|
return (
|
||||||
|
f"This URL was scanned recently. "
|
||||||
|
f"Please wait {rate_limit_minutes} minutes between scans."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set the rate limit
|
||||||
|
cache.set(cache_key, timezone.now().isoformat(), timeout=rate_limit_minutes * 60)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def check_concurrent_scan_limit() -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Check if maximum concurrent scans limit is reached.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Error message if limit reached, None otherwise
|
||||||
|
"""
|
||||||
|
scanner_config = settings.SCANNER_CONFIG
|
||||||
|
max_concurrent = scanner_config.get('MAX_CONCURRENT_SCANS', 3)
|
||||||
|
|
||||||
|
running_count = Scan.objects.filter(status=ScanStatus.RUNNING).count()
|
||||||
|
|
||||||
|
if running_count >= max_concurrent:
|
||||||
|
return (
|
||||||
|
f"Maximum concurrent scans ({max_concurrent}) reached. "
|
||||||
|
"Please wait for current scans to complete."
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
@ -0,0 +1,185 @@
|
||||||
|
"""
|
||||||
|
URL validation and safety utilities.
|
||||||
|
|
||||||
|
This module provides functions for validating and normalizing URLs,
|
||||||
|
including safety checks to prevent SSRF attacks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import ipaddress
|
||||||
|
import logging
|
||||||
|
import socket
|
||||||
|
from typing import Tuple
|
||||||
|
from urllib.parse import urlparse, urlunparse
|
||||||
|
|
||||||
|
import validators
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_url(url: str) -> Tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Validate and normalize a URL for scanning.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to validate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_valid, normalized_url_or_error_message)
|
||||||
|
"""
|
||||||
|
if not url:
|
||||||
|
return False, "URL is required"
|
||||||
|
|
||||||
|
# Basic URL validation
|
||||||
|
if not validators.url(url):
|
||||||
|
return False, "Invalid URL format"
|
||||||
|
|
||||||
|
# Parse the URL
|
||||||
|
try:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"Could not parse URL: {e}"
|
||||||
|
|
||||||
|
# Check scheme
|
||||||
|
if parsed.scheme not in ('http', 'https'):
|
||||||
|
return False, "URL must use http or https scheme"
|
||||||
|
|
||||||
|
# Check hostname
|
||||||
|
hostname = parsed.netloc.split(':')[0].lower()
|
||||||
|
|
||||||
|
if not hostname:
|
||||||
|
return False, "URL must have a valid hostname"
|
||||||
|
|
||||||
|
# Safety check: block localhost and private IPs
|
||||||
|
is_safe, safety_error = check_url_safety(hostname)
|
||||||
|
if not is_safe:
|
||||||
|
return False, safety_error
|
||||||
|
|
||||||
|
# Normalize URL
|
||||||
|
normalized = normalize_url(url)
|
||||||
|
|
||||||
|
return True, normalized
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_url(url: str) -> str:
|
||||||
|
"""
|
||||||
|
Normalize a URL to a canonical form.
|
||||||
|
|
||||||
|
- Lowercase hostname
|
||||||
|
- Remove trailing slashes from path
|
||||||
|
- Remove default ports
|
||||||
|
- Sort query parameters
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to normalize
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized URL string
|
||||||
|
"""
|
||||||
|
parsed = urlparse(url)
|
||||||
|
|
||||||
|
# Lowercase hostname
|
||||||
|
hostname = parsed.netloc.lower()
|
||||||
|
|
||||||
|
# Remove default ports
|
||||||
|
if ':80' in hostname and parsed.scheme == 'http':
|
||||||
|
hostname = hostname.replace(':80', '')
|
||||||
|
elif ':443' in hostname and parsed.scheme == 'https':
|
||||||
|
hostname = hostname.replace(':443', '')
|
||||||
|
|
||||||
|
# Normalize path (remove trailing slash except for root)
|
||||||
|
path = parsed.path
|
||||||
|
if path != '/' and path.endswith('/'):
|
||||||
|
path = path.rstrip('/')
|
||||||
|
if not path:
|
||||||
|
path = '/'
|
||||||
|
|
||||||
|
# Reconstruct URL
|
||||||
|
normalized = urlunparse((
|
||||||
|
parsed.scheme,
|
||||||
|
hostname,
|
||||||
|
path,
|
||||||
|
parsed.params,
|
||||||
|
parsed.query,
|
||||||
|
'' # Remove fragment
|
||||||
|
))
|
||||||
|
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
def check_url_safety(hostname: str) -> Tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Check if a hostname is safe to scan (not localhost/private IP).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hostname: The hostname to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_safe, error_message_if_not_safe)
|
||||||
|
"""
|
||||||
|
scanner_config = settings.SCANNER_CONFIG
|
||||||
|
blocked_hosts = scanner_config.get('BLOCKED_HOSTS', [])
|
||||||
|
blocked_ranges = scanner_config.get('BLOCKED_IP_RANGES', [])
|
||||||
|
|
||||||
|
# Check blocked hostnames
|
||||||
|
if hostname in blocked_hosts:
|
||||||
|
return False, f"Scanning {hostname} is not allowed"
|
||||||
|
|
||||||
|
# Try to resolve hostname to IP
|
||||||
|
try:
|
||||||
|
ip_addresses = socket.getaddrinfo(
|
||||||
|
hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
|
||||||
|
)
|
||||||
|
except socket.gaierror:
|
||||||
|
# Could not resolve - might be okay for some hostnames
|
||||||
|
logger.warning(f"Could not resolve hostname: {hostname}")
|
||||||
|
return True, ""
|
||||||
|
|
||||||
|
for family, type_, proto, canonname, sockaddr in ip_addresses:
|
||||||
|
ip_str = sockaddr[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
ip = ipaddress.ip_address(ip_str)
|
||||||
|
|
||||||
|
# Check if IP is in any blocked range
|
||||||
|
for blocked_range in blocked_ranges:
|
||||||
|
try:
|
||||||
|
network = ipaddress.ip_network(blocked_range, strict=False)
|
||||||
|
if ip in network:
|
||||||
|
return False, f"Scanning private/local IP addresses is not allowed ({ip_str})"
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Additional checks
|
||||||
|
if ip.is_private:
|
||||||
|
return False, f"Scanning private IP addresses is not allowed ({ip_str})"
|
||||||
|
|
||||||
|
if ip.is_loopback:
|
||||||
|
return False, f"Scanning localhost/loopback addresses is not allowed ({ip_str})"
|
||||||
|
|
||||||
|
if ip.is_link_local:
|
||||||
|
return False, f"Scanning link-local addresses is not allowed ({ip_str})"
|
||||||
|
|
||||||
|
if ip.is_reserved:
|
||||||
|
return False, f"Scanning reserved IP addresses is not allowed ({ip_str})"
|
||||||
|
|
||||||
|
except ValueError:
|
||||||
|
# Not a valid IP address format
|
||||||
|
continue
|
||||||
|
|
||||||
|
return True, ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_domain_from_url(url: str) -> str:
|
||||||
|
"""
|
||||||
|
Extract the domain from a URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to extract domain from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The domain/hostname
|
||||||
|
"""
|
||||||
|
parsed = urlparse(url)
|
||||||
|
return parsed.netloc.split(':')[0].lower()
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>{% block title %}Website Analyzer{% endblock %}</title>
|
||||||
|
|
||||||
|
<!-- Tailwind CSS -->
|
||||||
|
<script src="https://cdn.tailwindcss.com"></script>
|
||||||
|
|
||||||
|
<!-- Alpine.js for interactivity -->
|
||||||
|
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
|
||||||
|
|
||||||
|
<!-- Chart.js for visualizations -->
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
[x-cloak] { display: none !important; }
|
||||||
|
|
||||||
|
/* Custom animations */
|
||||||
|
@keyframes pulse-slow {
|
||||||
|
0%, 100% { opacity: 1; }
|
||||||
|
50% { opacity: 0.5; }
|
||||||
|
}
|
||||||
|
.animate-pulse-slow {
|
||||||
|
animation: pulse-slow 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Score circle gradient */
|
||||||
|
.score-circle {
|
||||||
|
background: conic-gradient(
|
||||||
|
var(--score-color) calc(var(--score) * 3.6deg),
|
||||||
|
#e5e7eb calc(var(--score) * 3.6deg)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
||||||
|
{% block extra_head %}{% endblock %}
|
||||||
|
</head>
|
||||||
|
<body class="bg-gray-50 min-h-screen">
|
||||||
|
<!-- Navigation -->
|
||||||
|
<nav class="bg-white shadow-sm border-b border-gray-200">
|
||||||
|
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||||
|
<div class="flex justify-between h-16">
|
||||||
|
<div class="flex items-center">
|
||||||
|
<a href="/" class="flex items-center space-x-2">
|
||||||
|
<svg class="w-8 h-8 text-blue-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||||
|
d="M9 12l2 2 4-4m5.618-4.016A11.955 11.955 0 0112 2.944a11.955 11.955 0 01-8.618 3.04A12.02 12.02 0 003 9c0 5.591 3.824 10.29 9 11.622 5.176-1.332 9-6.03 9-11.622 0-1.042-.133-2.052-.382-3.016z"/>
|
||||||
|
</svg>
|
||||||
|
<span class="font-bold text-xl text-gray-900">Website Analyzer</span>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center space-x-4">
|
||||||
|
<a href="/" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">
|
||||||
|
New Scan
|
||||||
|
</a>
|
||||||
|
<a href="/api/" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">
|
||||||
|
API
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<!-- Main Content -->
|
||||||
|
<main class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
|
||||||
|
{% block content %}{% endblock %}
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<!-- Footer -->
|
||||||
|
<footer class="bg-white border-t border-gray-200 mt-auto">
|
||||||
|
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-6">
|
||||||
|
<div class="flex justify-between items-center">
|
||||||
|
<p class="text-gray-500 text-sm">
|
||||||
|
Website Analyzer - Security & Performance Scanner
|
||||||
|
</p>
|
||||||
|
<div class="flex space-x-4">
|
||||||
|
<a href="/api/health/" class="text-gray-400 hover:text-gray-600 text-sm">
|
||||||
|
Health Check
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
|
||||||
|
{% block extra_js %}{% endblock %}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
"""
|
||||||
|
Websites app initialization.
|
||||||
|
"""
|
||||||
|
|
||||||
|
default_app_config = 'websites.apps.WebsitesConfig'
|
||||||
|
|
@ -0,0 +1,93 @@
|
||||||
|
"""
|
||||||
|
Django admin configuration for Website Analyzer models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.contrib import admin
|
||||||
|
from .models import Website, Scan, Issue, Metric
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(Website)
|
||||||
|
class WebsiteAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('url', 'domain', 'created_at', 'last_scanned_at')
|
||||||
|
list_filter = ('created_at', 'last_scanned_at')
|
||||||
|
search_fields = ('url', 'domain')
|
||||||
|
readonly_fields = ('id', 'created_at', 'domain')
|
||||||
|
ordering = ('-created_at',)
|
||||||
|
|
||||||
|
|
||||||
|
class IssueInline(admin.TabularInline):
|
||||||
|
model = Issue
|
||||||
|
extra = 0
|
||||||
|
readonly_fields = ('id', 'category', 'severity', 'tool', 'title', 'created_at')
|
||||||
|
can_delete = False
|
||||||
|
show_change_link = True
|
||||||
|
max_num = 10
|
||||||
|
|
||||||
|
|
||||||
|
class MetricInline(admin.TabularInline):
|
||||||
|
model = Metric
|
||||||
|
extra = 0
|
||||||
|
readonly_fields = ('id', 'name', 'display_name', 'value', 'unit', 'source', 'score')
|
||||||
|
can_delete = False
|
||||||
|
max_num = 15
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(Scan)
|
||||||
|
class ScanAdmin(admin.ModelAdmin):
|
||||||
|
list_display = (
|
||||||
|
'id', 'website', 'status', 'overall_score',
|
||||||
|
'performance_score', 'security_score', 'created_at'
|
||||||
|
)
|
||||||
|
list_filter = ('status', 'created_at')
|
||||||
|
search_fields = ('website__url', 'website__domain')
|
||||||
|
readonly_fields = (
|
||||||
|
'id', 'created_at', 'started_at', 'completed_at',
|
||||||
|
'celery_task_id', 'raw_lighthouse_data', 'raw_zap_data',
|
||||||
|
'raw_playwright_data', 'raw_headers_data'
|
||||||
|
)
|
||||||
|
inlines = [IssueInline, MetricInline]
|
||||||
|
ordering = ('-created_at',)
|
||||||
|
|
||||||
|
fieldsets = (
|
||||||
|
('Basic Info', {
|
||||||
|
'fields': ('id', 'website', 'status', 'celery_task_id')
|
||||||
|
}),
|
||||||
|
('Timestamps', {
|
||||||
|
'fields': ('created_at', 'started_at', 'completed_at')
|
||||||
|
}),
|
||||||
|
('Scores', {
|
||||||
|
'fields': (
|
||||||
|
'overall_score', 'performance_score', 'accessibility_score',
|
||||||
|
'seo_score', 'best_practices_score', 'security_score'
|
||||||
|
)
|
||||||
|
}),
|
||||||
|
('Errors', {
|
||||||
|
'fields': ('error_message',),
|
||||||
|
'classes': ('collapse',)
|
||||||
|
}),
|
||||||
|
('Raw Data', {
|
||||||
|
'fields': (
|
||||||
|
'raw_lighthouse_data', 'raw_zap_data',
|
||||||
|
'raw_playwright_data', 'raw_headers_data'
|
||||||
|
),
|
||||||
|
'classes': ('collapse',)
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(Issue)
|
||||||
|
class IssueAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('title', 'scan', 'category', 'severity', 'tool', 'created_at')
|
||||||
|
list_filter = ('category', 'severity', 'tool', 'created_at')
|
||||||
|
search_fields = ('title', 'description', 'scan__website__url')
|
||||||
|
readonly_fields = ('id', 'created_at', 'raw_data')
|
||||||
|
ordering = ('severity', '-created_at')
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(Metric)
|
||||||
|
class MetricAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('display_name', 'scan', 'value', 'unit', 'source', 'score')
|
||||||
|
list_filter = ('source', 'unit')
|
||||||
|
search_fields = ('name', 'display_name', 'scan__website__url')
|
||||||
|
readonly_fields = ('id', 'created_at')
|
||||||
|
ordering = ('name',)
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""
|
||||||
|
Websites app configuration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class WebsitesConfig(AppConfig):
|
||||||
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
|
name = 'websites'
|
||||||
|
verbose_name = 'Website Scanner'
|
||||||
|
|
@ -0,0 +1,493 @@
|
||||||
|
"""
|
||||||
|
Database models for Website Analyzer.
|
||||||
|
|
||||||
|
This module defines the core data models for storing websites, scans,
|
||||||
|
issues, and metrics from various scanning tools.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from django.db import models
|
||||||
|
from django.utils import timezone
|
||||||
|
from django.core.validators import URLValidator
|
||||||
|
|
||||||
|
|
||||||
|
class Website(models.Model):
|
||||||
|
"""
|
||||||
|
Represents a website that has been scanned.
|
||||||
|
|
||||||
|
Each unique URL gets one Website record, which can have multiple
|
||||||
|
Scan records associated with it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id = models.UUIDField(
|
||||||
|
primary_key=True,
|
||||||
|
default=uuid.uuid4,
|
||||||
|
editable=False,
|
||||||
|
help_text="Unique identifier for the website"
|
||||||
|
)
|
||||||
|
url = models.URLField(
|
||||||
|
max_length=2048,
|
||||||
|
unique=True,
|
||||||
|
validators=[URLValidator(schemes=['http', 'https'])],
|
||||||
|
help_text="The normalized URL of the website"
|
||||||
|
)
|
||||||
|
domain = models.CharField(
|
||||||
|
max_length=255,
|
||||||
|
db_index=True,
|
||||||
|
help_text="The domain extracted from the URL"
|
||||||
|
)
|
||||||
|
created_at = models.DateTimeField(
|
||||||
|
auto_now_add=True,
|
||||||
|
help_text="When the website was first added"
|
||||||
|
)
|
||||||
|
last_scanned_at = models.DateTimeField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="When the website was last scanned"
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = 'websites'
|
||||||
|
ordering = ['-created_at']
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=['domain']),
|
||||||
|
models.Index(fields=['-last_scanned_at']),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.url
|
||||||
|
|
||||||
|
def save(self, *args, **kwargs):
|
||||||
|
"""Extract domain from URL before saving."""
|
||||||
|
if self.url:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(self.url)
|
||||||
|
self.domain = parsed.netloc.lower()
|
||||||
|
super().save(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class ScanStatus(models.TextChoices):
|
||||||
|
"""Enumeration of possible scan statuses."""
|
||||||
|
PENDING = 'pending', 'Pending'
|
||||||
|
RUNNING = 'running', 'Running'
|
||||||
|
DONE = 'done', 'Completed'
|
||||||
|
FAILED = 'failed', 'Failed'
|
||||||
|
PARTIAL = 'partial', 'Partially Completed'
|
||||||
|
|
||||||
|
|
||||||
|
class Scan(models.Model):
|
||||||
|
"""
|
||||||
|
Represents a single scan of a website.
|
||||||
|
|
||||||
|
Contains aggregated scores from various scanning tools and
|
||||||
|
links to detailed issues and metrics.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id = models.UUIDField(
|
||||||
|
primary_key=True,
|
||||||
|
default=uuid.uuid4,
|
||||||
|
editable=False,
|
||||||
|
help_text="Unique identifier for the scan"
|
||||||
|
)
|
||||||
|
website = models.ForeignKey(
|
||||||
|
Website,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name='scans',
|
||||||
|
help_text="The website that was scanned"
|
||||||
|
)
|
||||||
|
status = models.CharField(
|
||||||
|
max_length=20,
|
||||||
|
choices=ScanStatus.choices,
|
||||||
|
default=ScanStatus.PENDING,
|
||||||
|
db_index=True,
|
||||||
|
help_text="Current status of the scan"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Celery task tracking
|
||||||
|
celery_task_id = models.CharField(
|
||||||
|
max_length=255,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Celery task ID for tracking"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Timestamps
|
||||||
|
created_at = models.DateTimeField(
|
||||||
|
auto_now_add=True,
|
||||||
|
help_text="When the scan was created"
|
||||||
|
)
|
||||||
|
started_at = models.DateTimeField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="When the scan started running"
|
||||||
|
)
|
||||||
|
completed_at = models.DateTimeField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="When the scan completed"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Aggregated scores (0-100)
|
||||||
|
performance_score = models.IntegerField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Lighthouse performance score (0-100)"
|
||||||
|
)
|
||||||
|
accessibility_score = models.IntegerField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Lighthouse accessibility score (0-100)"
|
||||||
|
)
|
||||||
|
seo_score = models.IntegerField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Lighthouse SEO score (0-100)"
|
||||||
|
)
|
||||||
|
best_practices_score = models.IntegerField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Lighthouse best practices score (0-100)"
|
||||||
|
)
|
||||||
|
security_score = models.IntegerField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Computed security score based on issues (0-100)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Overall health score (computed average)
|
||||||
|
overall_score = models.IntegerField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Overall health score (0-100)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Error tracking
|
||||||
|
error_message = models.TextField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Error message if scan failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Raw data from scanners
|
||||||
|
raw_lighthouse_data = models.JSONField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Raw Lighthouse report data"
|
||||||
|
)
|
||||||
|
raw_zap_data = models.JSONField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Raw OWASP ZAP report data"
|
||||||
|
)
|
||||||
|
raw_playwright_data = models.JSONField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Raw Playwright analysis data"
|
||||||
|
)
|
||||||
|
raw_headers_data = models.JSONField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Raw HTTP headers analysis data"
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = 'scans'
|
||||||
|
ordering = ['-created_at']
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=['status']),
|
||||||
|
models.Index(fields=['-created_at']),
|
||||||
|
models.Index(fields=['website', '-created_at']),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"Scan {self.id} - {self.website.url} ({self.status})"
|
||||||
|
|
||||||
|
def calculate_overall_score(self):
|
||||||
|
"""
|
||||||
|
Calculate overall health score as weighted average of all scores.
|
||||||
|
|
||||||
|
Weights:
|
||||||
|
- Performance: 25%
|
||||||
|
- Security: 30%
|
||||||
|
- Accessibility: 15%
|
||||||
|
- SEO: 15%
|
||||||
|
- Best Practices: 15%
|
||||||
|
"""
|
||||||
|
scores = [
|
||||||
|
(self.performance_score, 0.25),
|
||||||
|
(self.security_score, 0.30),
|
||||||
|
(self.accessibility_score, 0.15),
|
||||||
|
(self.seo_score, 0.15),
|
||||||
|
(self.best_practices_score, 0.15),
|
||||||
|
]
|
||||||
|
|
||||||
|
total_weight = 0
|
||||||
|
weighted_sum = 0
|
||||||
|
|
||||||
|
for score, weight in scores:
|
||||||
|
if score is not None:
|
||||||
|
weighted_sum += score * weight
|
||||||
|
total_weight += weight
|
||||||
|
|
||||||
|
if total_weight > 0:
|
||||||
|
self.overall_score = round(weighted_sum / total_weight)
|
||||||
|
else:
|
||||||
|
self.overall_score = None
|
||||||
|
|
||||||
|
return self.overall_score
|
||||||
|
|
||||||
|
def calculate_security_score(self):
|
||||||
|
"""
|
||||||
|
Calculate security score based on security issues found.
|
||||||
|
|
||||||
|
Starts at 100 and deducts points based on issue severity:
|
||||||
|
- Critical: -25 points each
|
||||||
|
- High: -15 points each
|
||||||
|
- Medium: -8 points each
|
||||||
|
- Low: -3 points each
|
||||||
|
- Info: -1 point each
|
||||||
|
"""
|
||||||
|
deductions = {
|
||||||
|
'critical': 25,
|
||||||
|
'high': 15,
|
||||||
|
'medium': 8,
|
||||||
|
'low': 3,
|
||||||
|
'info': 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
score = 100
|
||||||
|
security_issues = self.issues.filter(
|
||||||
|
category__in=['security', 'headers', 'tls', 'cors']
|
||||||
|
)
|
||||||
|
|
||||||
|
for issue in security_issues:
|
||||||
|
score -= deductions.get(issue.severity, 0)
|
||||||
|
|
||||||
|
self.security_score = max(0, score)
|
||||||
|
return self.security_score
|
||||||
|
|
||||||
|
|
||||||
|
class IssueCategory(models.TextChoices):
|
||||||
|
"""Categories of issues that can be detected."""
|
||||||
|
PERFORMANCE = 'performance', 'Performance'
|
||||||
|
SECURITY = 'security', 'Security'
|
||||||
|
HEADERS = 'headers', 'HTTP Headers'
|
||||||
|
TLS = 'tls', 'TLS/SSL'
|
||||||
|
CORS = 'cors', 'CORS'
|
||||||
|
ACCESSIBILITY = 'accessibility', 'Accessibility'
|
||||||
|
SEO = 'seo', 'SEO'
|
||||||
|
BEST_PRACTICES = 'best_practices', 'Best Practices'
|
||||||
|
CONTENT = 'content', 'Content'
|
||||||
|
RESOURCES = 'resources', 'Resources'
|
||||||
|
|
||||||
|
|
||||||
|
class IssueSeverity(models.TextChoices):
|
||||||
|
"""Severity levels for issues."""
|
||||||
|
CRITICAL = 'critical', 'Critical'
|
||||||
|
HIGH = 'high', 'High'
|
||||||
|
MEDIUM = 'medium', 'Medium'
|
||||||
|
LOW = 'low', 'Low'
|
||||||
|
INFO = 'info', 'Informational'
|
||||||
|
|
||||||
|
|
||||||
|
class ScannerTool(models.TextChoices):
|
||||||
|
"""Scanner tools that can detect issues."""
|
||||||
|
LIGHTHOUSE = 'lighthouse', 'Google Lighthouse'
|
||||||
|
ZAP = 'owasp_zap', 'OWASP ZAP'
|
||||||
|
PLAYWRIGHT = 'playwright', 'Playwright'
|
||||||
|
HEADER_CHECK = 'header_check', 'HTTP Header Check'
|
||||||
|
TLS_CHECK = 'tls_check', 'TLS/SSL Check'
|
||||||
|
|
||||||
|
|
||||||
|
class Issue(models.Model):
|
||||||
|
"""
|
||||||
|
Represents a specific issue found during a scan.
|
||||||
|
|
||||||
|
Issues are categorized by type, severity, and the tool that detected them.
|
||||||
|
Each issue includes a description and suggested remediation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id = models.UUIDField(
|
||||||
|
primary_key=True,
|
||||||
|
default=uuid.uuid4,
|
||||||
|
editable=False
|
||||||
|
)
|
||||||
|
scan = models.ForeignKey(
|
||||||
|
Scan,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name='issues',
|
||||||
|
help_text="The scan that found this issue"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Classification
|
||||||
|
category = models.CharField(
|
||||||
|
max_length=30,
|
||||||
|
choices=IssueCategory.choices,
|
||||||
|
db_index=True,
|
||||||
|
help_text="Category of the issue"
|
||||||
|
)
|
||||||
|
severity = models.CharField(
|
||||||
|
max_length=20,
|
||||||
|
choices=IssueSeverity.choices,
|
||||||
|
db_index=True,
|
||||||
|
help_text="Severity level of the issue"
|
||||||
|
)
|
||||||
|
tool = models.CharField(
|
||||||
|
max_length=30,
|
||||||
|
choices=ScannerTool.choices,
|
||||||
|
help_text="Tool that detected this issue"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Issue details
|
||||||
|
title = models.CharField(
|
||||||
|
max_length=500,
|
||||||
|
help_text="Brief title of the issue"
|
||||||
|
)
|
||||||
|
description = models.TextField(
|
||||||
|
help_text="Detailed description of the issue"
|
||||||
|
)
|
||||||
|
affected_url = models.URLField(
|
||||||
|
max_length=2048,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Specific URL affected by this issue"
|
||||||
|
)
|
||||||
|
remediation = models.TextField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Suggested fix or remediation"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Additional data from scanner
|
||||||
|
raw_data = models.JSONField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Raw data from the scanner for this issue"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Timestamps
|
||||||
|
created_at = models.DateTimeField(
|
||||||
|
auto_now_add=True
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = 'issues'
|
||||||
|
ordering = ['severity', '-created_at']
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=['scan', 'category']),
|
||||||
|
models.Index(fields=['scan', 'severity']),
|
||||||
|
models.Index(fields=['tool']),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"[{self.severity}] {self.title}"
|
||||||
|
|
||||||
|
|
||||||
|
class MetricUnit(models.TextChoices):
|
||||||
|
"""Units of measurement for metrics."""
|
||||||
|
MILLISECONDS = 'ms', 'Milliseconds'
|
||||||
|
SECONDS = 's', 'Seconds'
|
||||||
|
BYTES = 'bytes', 'Bytes'
|
||||||
|
KILOBYTES = 'kb', 'Kilobytes'
|
||||||
|
MEGABYTES = 'mb', 'Megabytes'
|
||||||
|
SCORE = 'score', 'Score (0-1)'
|
||||||
|
PERCENT = 'percent', 'Percentage'
|
||||||
|
COUNT = 'count', 'Count'
|
||||||
|
|
||||||
|
|
||||||
|
class Metric(models.Model):
|
||||||
|
"""
|
||||||
|
Represents a specific metric measured during a scan.
|
||||||
|
|
||||||
|
Metrics are numerical values with units, such as page load time,
|
||||||
|
total byte weight, number of requests, etc.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id = models.UUIDField(
|
||||||
|
primary_key=True,
|
||||||
|
default=uuid.uuid4,
|
||||||
|
editable=False
|
||||||
|
)
|
||||||
|
scan = models.ForeignKey(
|
||||||
|
Scan,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name='metrics',
|
||||||
|
help_text="The scan that measured this metric"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Metric identification
|
||||||
|
name = models.CharField(
|
||||||
|
max_length=100,
|
||||||
|
db_index=True,
|
||||||
|
help_text="Name of the metric (e.g., 'first_contentful_paint_ms')"
|
||||||
|
)
|
||||||
|
display_name = models.CharField(
|
||||||
|
max_length=200,
|
||||||
|
help_text="Human-readable name for display"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Value
|
||||||
|
value = models.FloatField(
|
||||||
|
help_text="Numeric value of the metric"
|
||||||
|
)
|
||||||
|
unit = models.CharField(
|
||||||
|
max_length=20,
|
||||||
|
choices=MetricUnit.choices,
|
||||||
|
help_text="Unit of measurement"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Source
|
||||||
|
source = models.CharField(
|
||||||
|
max_length=30,
|
||||||
|
choices=ScannerTool.choices,
|
||||||
|
help_text="Tool that provided this metric"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Score (if applicable)
|
||||||
|
score = models.FloatField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Lighthouse score for this metric (0-1)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Timestamp
|
||||||
|
created_at = models.DateTimeField(
|
||||||
|
auto_now_add=True
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = 'metrics'
|
||||||
|
ordering = ['name']
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=['scan', 'name']),
|
||||||
|
models.Index(fields=['source']),
|
||||||
|
]
|
||||||
|
# Ensure unique metric names per scan
|
||||||
|
constraints = [
|
||||||
|
models.UniqueConstraint(
|
||||||
|
fields=['scan', 'name'],
|
||||||
|
name='unique_metric_per_scan'
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.display_name}: {self.value} {self.unit}"
|
||||||
|
|
||||||
|
def get_formatted_value(self):
|
||||||
|
"""Return a formatted string representation of the value."""
|
||||||
|
if self.unit == MetricUnit.MILLISECONDS:
|
||||||
|
if self.value >= 1000:
|
||||||
|
return f"{self.value / 1000:.2f}s"
|
||||||
|
return f"{self.value:.0f}ms"
|
||||||
|
elif self.unit == MetricUnit.BYTES:
|
||||||
|
if self.value >= 1024 * 1024:
|
||||||
|
return f"{self.value / (1024 * 1024):.2f} MB"
|
||||||
|
elif self.value >= 1024:
|
||||||
|
return f"{self.value / 1024:.1f} KB"
|
||||||
|
return f"{self.value:.0f} bytes"
|
||||||
|
elif self.unit == MetricUnit.PERCENT:
|
||||||
|
return f"{self.value:.1f}%"
|
||||||
|
elif self.unit == MetricUnit.SCORE:
|
||||||
|
return f"{self.value:.3f}"
|
||||||
|
else:
|
||||||
|
return f"{self.value:.2f} {self.get_unit_display()}"
|
||||||
|
|
@ -0,0 +1,160 @@
|
||||||
|
# Website Analyzer - Docker Compose Configuration
|
||||||
|
# This file orchestrates all services required for the application
|
||||||
|
|
||||||
|
version: '3.9'
|
||||||
|
|
||||||
|
services:
|
||||||
|
# ==========================================================================
|
||||||
|
# PostgreSQL Database
|
||||||
|
# ==========================================================================
|
||||||
|
db:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
container_name: analyzer_db
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: analyzer
|
||||||
|
POSTGRES_PASSWORD: analyzer_password
|
||||||
|
POSTGRES_DB: website_analyzer
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U analyzer -d website_analyzer"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Redis - Message Broker & Cache
|
||||||
|
# ==========================================================================
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
container_name: analyzer_redis
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
volumes:
|
||||||
|
- redis_data:/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Django Web Application
|
||||||
|
# ==========================================================================
|
||||||
|
web:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: analyzer_web
|
||||||
|
restart: unless-stopped
|
||||||
|
command: >
|
||||||
|
sh -c "python manage.py migrate &&
|
||||||
|
python manage.py collectstatic --noinput &&
|
||||||
|
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --workers 4 --threads 2"
|
||||||
|
volumes:
|
||||||
|
- ./backend:/app
|
||||||
|
- static_volume:/app/staticfiles
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
env_file:
|
||||||
|
- ./backend/.env
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8000/api/health/"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Celery Worker - Background Task Processing
|
||||||
|
# ==========================================================================
|
||||||
|
celery_worker:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: analyzer_celery_worker
|
||||||
|
restart: unless-stopped
|
||||||
|
command: celery -A core worker -l INFO --concurrency=2
|
||||||
|
volumes:
|
||||||
|
- ./backend:/app
|
||||||
|
env_file:
|
||||||
|
- ./backend/.env
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
- redis
|
||||||
|
- web
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Celery Beat - Scheduled Tasks (Optional)
|
||||||
|
# ==========================================================================
|
||||||
|
celery_beat:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: analyzer_celery_beat
|
||||||
|
restart: unless-stopped
|
||||||
|
command: celery -A core beat -l INFO
|
||||||
|
volumes:
|
||||||
|
- ./backend:/app
|
||||||
|
env_file:
|
||||||
|
- ./backend/.env
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
- redis
|
||||||
|
- celery_worker
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# OWASP ZAP - Security Scanner
|
||||||
|
# ==========================================================================
|
||||||
|
zap:
|
||||||
|
image: ghcr.io/zaproxy/zaproxy:stable
|
||||||
|
container_name: analyzer_zap
|
||||||
|
restart: unless-stopped
|
||||||
|
command: zap.sh -daemon -host 0.0.0.0 -port 8080 -config api.key=zap-api-key-change-me -config api.addrs.addr.name=.* -config api.addrs.addr.regex=true
|
||||||
|
ports:
|
||||||
|
- "8081:8080"
|
||||||
|
volumes:
|
||||||
|
- zap_data:/home/zap/.ZAP
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8080/JSON/core/view/version/?apikey=zap-api-key-change-me"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Lighthouse Scanner Service (Node.js)
|
||||||
|
# ==========================================================================
|
||||||
|
lighthouse:
|
||||||
|
build:
|
||||||
|
context: ./lighthouse
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: analyzer_lighthouse
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "3001:3001"
|
||||||
|
volumes:
|
||||||
|
- lighthouse_reports:/app/reports
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3001/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
redis_data:
|
||||||
|
static_volume:
|
||||||
|
zap_data:
|
||||||
|
lighthouse_reports:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
name: analyzer_network
|
||||||
|
|
@ -0,0 +1,54 @@
|
||||||
|
# Lighthouse Scanner Service - Dockerfile
|
||||||
|
# Node.js service that runs Lighthouse CLI and provides HTTP API
|
||||||
|
|
||||||
|
FROM node:20-slim
|
||||||
|
|
||||||
|
# Install Chrome dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
chromium \
|
||||||
|
fonts-liberation \
|
||||||
|
libappindicator3-1 \
|
||||||
|
libasound2 \
|
||||||
|
libatk-bridge2.0-0 \
|
||||||
|
libatk1.0-0 \
|
||||||
|
libcups2 \
|
||||||
|
libdbus-1-3 \
|
||||||
|
libdrm2 \
|
||||||
|
libgbm1 \
|
||||||
|
libgtk-3-0 \
|
||||||
|
libnspr4 \
|
||||||
|
libnss3 \
|
||||||
|
libxcomposite1 \
|
||||||
|
libxdamage1 \
|
||||||
|
libxfixes3 \
|
||||||
|
libxkbcommon0 \
|
||||||
|
libxrandr2 \
|
||||||
|
xdg-utils \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Set Chrome path for Lighthouse
|
||||||
|
ENV CHROME_PATH=/usr/bin/chromium
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package files
|
||||||
|
COPY package*.json ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN npm ci --only=production
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create reports directory
|
||||||
|
RUN mkdir -p reports
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -m -u 1000 lighthouse && \
|
||||||
|
chown -R lighthouse:lighthouse /app
|
||||||
|
USER lighthouse
|
||||||
|
|
||||||
|
EXPOSE 3001
|
||||||
|
|
||||||
|
CMD ["node", "server.js"]
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
{
|
||||||
|
"name": "lighthouse-scanner",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Lighthouse scanner service for Website Analyzer",
|
||||||
|
"main": "server.js",
|
||||||
|
"scripts": {
|
||||||
|
"start": "node server.js",
|
||||||
|
"dev": "node --watch server.js"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"express": "^4.18.2",
|
||||||
|
"lighthouse": "^11.4.0",
|
||||||
|
"chrome-launcher": "^1.1.0",
|
||||||
|
"uuid": "^9.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,328 @@
|
||||||
|
/**
|
||||||
|
* Lighthouse Scanner Service
|
||||||
|
*
|
||||||
|
* This service provides an HTTP API for running Lighthouse audits.
|
||||||
|
* It's designed to be called from the Django backend via Celery tasks.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const express = require('express');
|
||||||
|
const lighthouse = require('lighthouse');
|
||||||
|
const chromeLauncher = require('chrome-launcher');
|
||||||
|
const { v4: uuidv4 } = require('uuid');
|
||||||
|
const fs = require('fs').promises;
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const app = express();
|
||||||
|
app.use(express.json());
|
||||||
|
|
||||||
|
const PORT = process.env.PORT || 3001;
|
||||||
|
const REPORTS_DIR = path.join(__dirname, 'reports');
|
||||||
|
|
||||||
|
// Ensure reports directory exists
|
||||||
|
fs.mkdir(REPORTS_DIR, { recursive: true }).catch(console.error);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Health check endpoint
|
||||||
|
*/
|
||||||
|
app.get('/health', (req, res) => {
|
||||||
|
res.json({ status: 'healthy', service: 'lighthouse-scanner' });
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run Lighthouse audit for a given URL
|
||||||
|
*
|
||||||
|
* POST /scan
|
||||||
|
* Body: { "url": "https://example.com" }
|
||||||
|
*
|
||||||
|
* Returns: Lighthouse audit results as JSON
|
||||||
|
*/
|
||||||
|
app.post('/scan', async (req, res) => {
|
||||||
|
const { url } = req.body;
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
return res.status(400).json({ error: 'URL is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate URL format
|
||||||
|
try {
|
||||||
|
new URL(url);
|
||||||
|
} catch (e) {
|
||||||
|
return res.status(400).json({ error: 'Invalid URL format' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const scanId = uuidv4();
|
||||||
|
console.log(`[${scanId}] Starting Lighthouse scan for: ${url}`);
|
||||||
|
|
||||||
|
let chrome = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Launch Chrome
|
||||||
|
chrome = await chromeLauncher.launch({
|
||||||
|
chromeFlags: [
|
||||||
|
'--headless',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-extensions',
|
||||||
|
'--disable-background-networking',
|
||||||
|
'--disable-sync',
|
||||||
|
'--disable-translate',
|
||||||
|
'--metrics-recording-only',
|
||||||
|
'--mute-audio',
|
||||||
|
'--no-first-run',
|
||||||
|
'--safebrowsing-disable-auto-update'
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[${scanId}] Chrome launched on port ${chrome.port}`);
|
||||||
|
|
||||||
|
// Lighthouse configuration
|
||||||
|
const options = {
|
||||||
|
logLevel: 'error',
|
||||||
|
output: 'json',
|
||||||
|
port: chrome.port,
|
||||||
|
onlyCategories: ['performance', 'accessibility', 'best-practices', 'seo'],
|
||||||
|
// Throttling settings for more realistic results
|
||||||
|
throttling: {
|
||||||
|
cpuSlowdownMultiplier: 4,
|
||||||
|
downloadThroughputKbps: 1638.4,
|
||||||
|
uploadThroughputKbps: 675,
|
||||||
|
rttMs: 150
|
||||||
|
},
|
||||||
|
screenEmulation: {
|
||||||
|
mobile: false,
|
||||||
|
width: 1920,
|
||||||
|
height: 1080,
|
||||||
|
deviceScaleFactor: 1,
|
||||||
|
disabled: false
|
||||||
|
},
|
||||||
|
formFactor: 'desktop'
|
||||||
|
};
|
||||||
|
|
||||||
|
// Run Lighthouse
|
||||||
|
const runnerResult = await lighthouse(url, options);
|
||||||
|
|
||||||
|
// Extract the report
|
||||||
|
const report = runnerResult.lhr;
|
||||||
|
|
||||||
|
// Process and extract key metrics
|
||||||
|
const result = {
|
||||||
|
scanId,
|
||||||
|
url: report.finalUrl || url,
|
||||||
|
fetchTime: report.fetchTime,
|
||||||
|
|
||||||
|
// Category scores (0-100)
|
||||||
|
scores: {
|
||||||
|
performance: Math.round((report.categories.performance?.score || 0) * 100),
|
||||||
|
accessibility: Math.round((report.categories.accessibility?.score || 0) * 100),
|
||||||
|
bestPractices: Math.round((report.categories['best-practices']?.score || 0) * 100),
|
||||||
|
seo: Math.round((report.categories.seo?.score || 0) * 100)
|
||||||
|
},
|
||||||
|
|
||||||
|
// Core Web Vitals and key metrics
|
||||||
|
metrics: {
|
||||||
|
firstContentfulPaint: {
|
||||||
|
value: report.audits['first-contentful-paint']?.numericValue || null,
|
||||||
|
unit: 'ms',
|
||||||
|
score: report.audits['first-contentful-paint']?.score || null
|
||||||
|
},
|
||||||
|
largestContentfulPaint: {
|
||||||
|
value: report.audits['largest-contentful-paint']?.numericValue || null,
|
||||||
|
unit: 'ms',
|
||||||
|
score: report.audits['largest-contentful-paint']?.score || null
|
||||||
|
},
|
||||||
|
speedIndex: {
|
||||||
|
value: report.audits['speed-index']?.numericValue || null,
|
||||||
|
unit: 'ms',
|
||||||
|
score: report.audits['speed-index']?.score || null
|
||||||
|
},
|
||||||
|
timeToInteractive: {
|
||||||
|
value: report.audits['interactive']?.numericValue || null,
|
||||||
|
unit: 'ms',
|
||||||
|
score: report.audits['interactive']?.score || null
|
||||||
|
},
|
||||||
|
totalBlockingTime: {
|
||||||
|
value: report.audits['total-blocking-time']?.numericValue || null,
|
||||||
|
unit: 'ms',
|
||||||
|
score: report.audits['total-blocking-time']?.score || null
|
||||||
|
},
|
||||||
|
cumulativeLayoutShift: {
|
||||||
|
value: report.audits['cumulative-layout-shift']?.numericValue || null,
|
||||||
|
unit: 'score',
|
||||||
|
score: report.audits['cumulative-layout-shift']?.score || null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
// JavaScript and resource audits
|
||||||
|
resources: {
|
||||||
|
totalByteWeight: report.audits['total-byte-weight']?.numericValue || null,
|
||||||
|
bootupTime: report.audits['bootup-time']?.numericValue || null,
|
||||||
|
mainThreadWork: report.audits['mainthread-work-breakdown']?.numericValue || null,
|
||||||
|
|
||||||
|
// Unused resources
|
||||||
|
unusedJavascript: extractUnusedResources(report.audits['unused-javascript']),
|
||||||
|
unusedCss: extractUnusedResources(report.audits['unused-css-rules']),
|
||||||
|
|
||||||
|
// Render blocking resources
|
||||||
|
renderBlockingResources: extractRenderBlockingResources(report.audits['render-blocking-resources']),
|
||||||
|
|
||||||
|
// Large bundles
|
||||||
|
scriptTreemap: extractLargeScripts(report.audits['script-treemap-data']),
|
||||||
|
|
||||||
|
// Third party usage
|
||||||
|
thirdPartySummary: extractThirdPartySummary(report.audits['third-party-summary'])
|
||||||
|
},
|
||||||
|
|
||||||
|
// Diagnostics
|
||||||
|
diagnostics: {
|
||||||
|
numRequests: report.audits['network-requests']?.details?.items?.length || 0,
|
||||||
|
numScripts: countResourcesByType(report.audits['network-requests'], 'Script'),
|
||||||
|
numStylesheets: countResourcesByType(report.audits['network-requests'], 'Stylesheet'),
|
||||||
|
numImages: countResourcesByType(report.audits['network-requests'], 'Image'),
|
||||||
|
numFonts: countResourcesByType(report.audits['network-requests'], 'Font'),
|
||||||
|
totalTransferSize: report.audits['total-byte-weight']?.numericValue || 0
|
||||||
|
},
|
||||||
|
|
||||||
|
// Failed audits (potential issues)
|
||||||
|
issues: extractFailedAudits(report)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Save full report to file for debugging
|
||||||
|
const reportPath = path.join(REPORTS_DIR, `${scanId}.json`);
|
||||||
|
await fs.writeFile(reportPath, JSON.stringify(report, null, 2));
|
||||||
|
|
||||||
|
console.log(`[${scanId}] Scan completed successfully`);
|
||||||
|
res.json(result);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`[${scanId}] Scan failed:`, error);
|
||||||
|
res.status(500).json({
|
||||||
|
error: 'Lighthouse scan failed',
|
||||||
|
message: error.message,
|
||||||
|
scanId
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
if (chrome) {
|
||||||
|
await chrome.kill();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a saved report by ID
|
||||||
|
*/
|
||||||
|
app.get('/report/:scanId', async (req, res) => {
|
||||||
|
const { scanId } = req.params;
|
||||||
|
const reportPath = path.join(REPORTS_DIR, `${scanId}.json`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const report = await fs.readFile(reportPath, 'utf8');
|
||||||
|
res.json(JSON.parse(report));
|
||||||
|
} catch (error) {
|
||||||
|
res.status(404).json({ error: 'Report not found' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Helper Functions
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
function extractUnusedResources(audit) {
|
||||||
|
if (!audit?.details?.items) return [];
|
||||||
|
|
||||||
|
return audit.details.items.slice(0, 10).map(item => ({
|
||||||
|
url: item.url,
|
||||||
|
totalBytes: item.totalBytes,
|
||||||
|
wastedBytes: item.wastedBytes,
|
||||||
|
wastedPercent: item.wastedPercent
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractRenderBlockingResources(audit) {
|
||||||
|
if (!audit?.details?.items) return [];
|
||||||
|
|
||||||
|
return audit.details.items.map(item => ({
|
||||||
|
url: item.url,
|
||||||
|
wastedMs: item.wastedMs,
|
||||||
|
totalBytes: item.totalBytes
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractLargeScripts(audit) {
|
||||||
|
if (!audit?.details?.nodes) return [];
|
||||||
|
|
||||||
|
// Get scripts larger than 100KB
|
||||||
|
const largeScripts = [];
|
||||||
|
const processNode = (node, path = '') => {
|
||||||
|
const currentPath = path ? `${path}/${node.name}` : node.name;
|
||||||
|
|
||||||
|
if (node.resourceBytes > 100 * 1024) {
|
||||||
|
largeScripts.push({
|
||||||
|
name: currentPath,
|
||||||
|
resourceBytes: node.resourceBytes,
|
||||||
|
unusedBytes: node.unusedBytes || 0
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node.children) {
|
||||||
|
node.children.forEach(child => processNode(child, currentPath));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
audit.details.nodes.forEach(node => processNode(node));
|
||||||
|
return largeScripts.slice(0, 20);
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractThirdPartySummary(audit) {
|
||||||
|
if (!audit?.details?.items) return [];
|
||||||
|
|
||||||
|
return audit.details.items.slice(0, 10).map(item => ({
|
||||||
|
entity: item.entity,
|
||||||
|
transferSize: item.transferSize,
|
||||||
|
blockingTime: item.blockingTime,
|
||||||
|
mainThreadTime: item.mainThreadTime
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
function countResourcesByType(audit, type) {
|
||||||
|
if (!audit?.details?.items) return 0;
|
||||||
|
return audit.details.items.filter(item => item.resourceType === type).length;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractFailedAudits(report) {
|
||||||
|
const issues = [];
|
||||||
|
|
||||||
|
const categoriesToCheck = ['performance', 'accessibility', 'best-practices', 'seo'];
|
||||||
|
|
||||||
|
categoriesToCheck.forEach(categoryId => {
|
||||||
|
const category = report.categories[categoryId];
|
||||||
|
if (!category?.auditRefs) return;
|
||||||
|
|
||||||
|
category.auditRefs.forEach(ref => {
|
||||||
|
const audit = report.audits[ref.id];
|
||||||
|
|
||||||
|
// Include audits with score < 0.5 (50%)
|
||||||
|
if (audit && audit.score !== null && audit.score < 0.5) {
|
||||||
|
issues.push({
|
||||||
|
id: audit.id,
|
||||||
|
category: categoryId,
|
||||||
|
title: audit.title,
|
||||||
|
description: audit.description,
|
||||||
|
score: audit.score,
|
||||||
|
displayValue: audit.displayValue,
|
||||||
|
impact: ref.weight || 0
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sort by impact (weight) descending
|
||||||
|
issues.sort((a, b) => b.impact - a.impact);
|
||||||
|
|
||||||
|
return issues.slice(0, 30);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the server
|
||||||
|
app.listen(PORT, '0.0.0.0', () => {
|
||||||
|
console.log(`Lighthouse Scanner Service running on port ${PORT}`);
|
||||||
|
});
|
||||||
Loading…
Reference in New Issue