secure-web/backend/websites/models.py

494 lines
13 KiB
Python

"""
Database models for Website Analyzer.
This module defines the core data models for storing websites, scans,
issues, and metrics from various scanning tools.
"""
import uuid
from django.db import models
from django.utils import timezone
from django.core.validators import URLValidator
class Website(models.Model):
"""
Represents a website that has been scanned.
Each unique URL gets one Website record, which can have multiple
Scan records associated with it.
"""
id = models.UUIDField(
primary_key=True,
default=uuid.uuid4,
editable=False,
help_text="Unique identifier for the website"
)
url = models.URLField(
max_length=2048,
unique=True,
validators=[URLValidator(schemes=['http', 'https'])],
help_text="The normalized URL of the website"
)
domain = models.CharField(
max_length=255,
db_index=True,
help_text="The domain extracted from the URL"
)
created_at = models.DateTimeField(
auto_now_add=True,
help_text="When the website was first added"
)
last_scanned_at = models.DateTimeField(
null=True,
blank=True,
help_text="When the website was last scanned"
)
class Meta:
db_table = 'websites'
ordering = ['-created_at']
indexes = [
models.Index(fields=['domain']),
models.Index(fields=['-last_scanned_at']),
]
def __str__(self):
return self.url
def save(self, *args, **kwargs):
"""Extract domain from URL before saving."""
if self.url:
from urllib.parse import urlparse
parsed = urlparse(self.url)
self.domain = parsed.netloc.lower()
super().save(*args, **kwargs)
class ScanStatus(models.TextChoices):
"""Enumeration of possible scan statuses."""
PENDING = 'pending', 'Pending'
RUNNING = 'running', 'Running'
DONE = 'done', 'Completed'
FAILED = 'failed', 'Failed'
PARTIAL = 'partial', 'Partially Completed'
class Scan(models.Model):
"""
Represents a single scan of a website.
Contains aggregated scores from various scanning tools and
links to detailed issues and metrics.
"""
id = models.UUIDField(
primary_key=True,
default=uuid.uuid4,
editable=False,
help_text="Unique identifier for the scan"
)
website = models.ForeignKey(
Website,
on_delete=models.CASCADE,
related_name='scans',
help_text="The website that was scanned"
)
status = models.CharField(
max_length=20,
choices=ScanStatus.choices,
default=ScanStatus.PENDING,
db_index=True,
help_text="Current status of the scan"
)
# Celery task tracking
celery_task_id = models.CharField(
max_length=255,
null=True,
blank=True,
help_text="Celery task ID for tracking"
)
# Timestamps
created_at = models.DateTimeField(
auto_now_add=True,
help_text="When the scan was created"
)
started_at = models.DateTimeField(
null=True,
blank=True,
help_text="When the scan started running"
)
completed_at = models.DateTimeField(
null=True,
blank=True,
help_text="When the scan completed"
)
# Aggregated scores (0-100)
performance_score = models.IntegerField(
null=True,
blank=True,
help_text="Lighthouse performance score (0-100)"
)
accessibility_score = models.IntegerField(
null=True,
blank=True,
help_text="Lighthouse accessibility score (0-100)"
)
seo_score = models.IntegerField(
null=True,
blank=True,
help_text="Lighthouse SEO score (0-100)"
)
best_practices_score = models.IntegerField(
null=True,
blank=True,
help_text="Lighthouse best practices score (0-100)"
)
security_score = models.IntegerField(
null=True,
blank=True,
help_text="Computed security score based on issues (0-100)"
)
# Overall health score (computed average)
overall_score = models.IntegerField(
null=True,
blank=True,
help_text="Overall health score (0-100)"
)
# Error tracking
error_message = models.TextField(
null=True,
blank=True,
help_text="Error message if scan failed"
)
# Raw data from scanners
raw_lighthouse_data = models.JSONField(
null=True,
blank=True,
help_text="Raw Lighthouse report data"
)
raw_zap_data = models.JSONField(
null=True,
blank=True,
help_text="Raw OWASP ZAP report data"
)
raw_playwright_data = models.JSONField(
null=True,
blank=True,
help_text="Raw Playwright analysis data"
)
raw_headers_data = models.JSONField(
null=True,
blank=True,
help_text="Raw HTTP headers analysis data"
)
class Meta:
db_table = 'scans'
ordering = ['-created_at']
indexes = [
models.Index(fields=['status']),
models.Index(fields=['-created_at']),
models.Index(fields=['website', '-created_at']),
]
def __str__(self):
return f"Scan {self.id} - {self.website.url} ({self.status})"
def calculate_overall_score(self):
"""
Calculate overall health score as weighted average of all scores.
Weights:
- Performance: 25%
- Security: 30%
- Accessibility: 15%
- SEO: 15%
- Best Practices: 15%
"""
scores = [
(self.performance_score, 0.25),
(self.security_score, 0.30),
(self.accessibility_score, 0.15),
(self.seo_score, 0.15),
(self.best_practices_score, 0.15),
]
total_weight = 0
weighted_sum = 0
for score, weight in scores:
if score is not None:
weighted_sum += score * weight
total_weight += weight
if total_weight > 0:
self.overall_score = round(weighted_sum / total_weight)
else:
self.overall_score = None
return self.overall_score
def calculate_security_score(self):
"""
Calculate security score based on security issues found.
Starts at 100 and deducts points based on issue severity:
- Critical: -25 points each
- High: -15 points each
- Medium: -8 points each
- Low: -3 points each
- Info: -1 point each
"""
deductions = {
'critical': 25,
'high': 15,
'medium': 8,
'low': 3,
'info': 1,
}
score = 100
security_issues = self.issues.filter(
category__in=['security', 'headers', 'tls', 'cors']
)
for issue in security_issues:
score -= deductions.get(issue.severity, 0)
self.security_score = max(0, score)
return self.security_score
class IssueCategory(models.TextChoices):
"""Categories of issues that can be detected."""
PERFORMANCE = 'performance', 'Performance'
SECURITY = 'security', 'Security'
HEADERS = 'headers', 'HTTP Headers'
TLS = 'tls', 'TLS/SSL'
CORS = 'cors', 'CORS'
ACCESSIBILITY = 'accessibility', 'Accessibility'
SEO = 'seo', 'SEO'
BEST_PRACTICES = 'best_practices', 'Best Practices'
CONTENT = 'content', 'Content'
RESOURCES = 'resources', 'Resources'
class IssueSeverity(models.TextChoices):
"""Severity levels for issues."""
CRITICAL = 'critical', 'Critical'
HIGH = 'high', 'High'
MEDIUM = 'medium', 'Medium'
LOW = 'low', 'Low'
INFO = 'info', 'Informational'
class ScannerTool(models.TextChoices):
"""Scanner tools that can detect issues."""
LIGHTHOUSE = 'lighthouse', 'Google Lighthouse'
ZAP = 'owasp_zap', 'OWASP ZAP'
PLAYWRIGHT = 'playwright', 'Playwright'
HEADER_CHECK = 'header_check', 'HTTP Header Check'
TLS_CHECK = 'tls_check', 'TLS/SSL Check'
class Issue(models.Model):
"""
Represents a specific issue found during a scan.
Issues are categorized by type, severity, and the tool that detected them.
Each issue includes a description and suggested remediation.
"""
id = models.UUIDField(
primary_key=True,
default=uuid.uuid4,
editable=False
)
scan = models.ForeignKey(
Scan,
on_delete=models.CASCADE,
related_name='issues',
help_text="The scan that found this issue"
)
# Classification
category = models.CharField(
max_length=30,
choices=IssueCategory.choices,
db_index=True,
help_text="Category of the issue"
)
severity = models.CharField(
max_length=20,
choices=IssueSeverity.choices,
db_index=True,
help_text="Severity level of the issue"
)
tool = models.CharField(
max_length=30,
choices=ScannerTool.choices,
help_text="Tool that detected this issue"
)
# Issue details
title = models.CharField(
max_length=500,
help_text="Brief title of the issue"
)
description = models.TextField(
help_text="Detailed description of the issue"
)
affected_url = models.URLField(
max_length=2048,
null=True,
blank=True,
help_text="Specific URL affected by this issue"
)
remediation = models.TextField(
null=True,
blank=True,
help_text="Suggested fix or remediation"
)
# Additional data from scanner
raw_data = models.JSONField(
null=True,
blank=True,
help_text="Raw data from the scanner for this issue"
)
# Timestamps
created_at = models.DateTimeField(
auto_now_add=True
)
class Meta:
db_table = 'issues'
ordering = ['severity', '-created_at']
indexes = [
models.Index(fields=['scan', 'category']),
models.Index(fields=['scan', 'severity']),
models.Index(fields=['tool']),
]
def __str__(self):
return f"[{self.severity}] {self.title}"
class MetricUnit(models.TextChoices):
"""Units of measurement for metrics."""
MILLISECONDS = 'ms', 'Milliseconds'
SECONDS = 's', 'Seconds'
BYTES = 'bytes', 'Bytes'
KILOBYTES = 'kb', 'Kilobytes'
MEGABYTES = 'mb', 'Megabytes'
SCORE = 'score', 'Score (0-1)'
PERCENT = 'percent', 'Percentage'
COUNT = 'count', 'Count'
class Metric(models.Model):
"""
Represents a specific metric measured during a scan.
Metrics are numerical values with units, such as page load time,
total byte weight, number of requests, etc.
"""
id = models.UUIDField(
primary_key=True,
default=uuid.uuid4,
editable=False
)
scan = models.ForeignKey(
Scan,
on_delete=models.CASCADE,
related_name='metrics',
help_text="The scan that measured this metric"
)
# Metric identification
name = models.CharField(
max_length=100,
db_index=True,
help_text="Name of the metric (e.g., 'first_contentful_paint_ms')"
)
display_name = models.CharField(
max_length=200,
help_text="Human-readable name for display"
)
# Value
value = models.FloatField(
help_text="Numeric value of the metric"
)
unit = models.CharField(
max_length=20,
choices=MetricUnit.choices,
help_text="Unit of measurement"
)
# Source
source = models.CharField(
max_length=30,
choices=ScannerTool.choices,
help_text="Tool that provided this metric"
)
# Score (if applicable)
score = models.FloatField(
null=True,
blank=True,
help_text="Lighthouse score for this metric (0-1)"
)
# Timestamp
created_at = models.DateTimeField(
auto_now_add=True
)
class Meta:
db_table = 'metrics'
ordering = ['name']
indexes = [
models.Index(fields=['scan', 'name']),
models.Index(fields=['source']),
]
# Ensure unique metric names per scan
constraints = [
models.UniqueConstraint(
fields=['scan', 'name'],
name='unique_metric_per_scan'
)
]
def __str__(self):
return f"{self.display_name}: {self.value} {self.unit}"
def get_formatted_value(self):
"""Return a formatted string representation of the value."""
if self.unit == MetricUnit.MILLISECONDS:
if self.value >= 1000:
return f"{self.value / 1000:.2f}s"
return f"{self.value:.0f}ms"
elif self.unit == MetricUnit.BYTES:
if self.value >= 1024 * 1024:
return f"{self.value / (1024 * 1024):.2f} MB"
elif self.value >= 1024:
return f"{self.value / 1024:.1f} KB"
return f"{self.value:.0f} bytes"
elif self.unit == MetricUnit.PERCENT:
return f"{self.value:.1f}%"
elif self.unit == MetricUnit.SCORE:
return f"{self.value:.3f}"
else:
return f"{self.value:.2f} {self.get_unit_display()}"