494 lines
13 KiB
Python
494 lines
13 KiB
Python
"""
|
|
Database models for Website Analyzer.
|
|
|
|
This module defines the core data models for storing websites, scans,
|
|
issues, and metrics from various scanning tools.
|
|
"""
|
|
|
|
import uuid
|
|
from django.db import models
|
|
from django.utils import timezone
|
|
from django.core.validators import URLValidator
|
|
|
|
|
|
class Website(models.Model):
|
|
"""
|
|
Represents a website that has been scanned.
|
|
|
|
Each unique URL gets one Website record, which can have multiple
|
|
Scan records associated with it.
|
|
"""
|
|
|
|
id = models.UUIDField(
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
editable=False,
|
|
help_text="Unique identifier for the website"
|
|
)
|
|
url = models.URLField(
|
|
max_length=2048,
|
|
unique=True,
|
|
validators=[URLValidator(schemes=['http', 'https'])],
|
|
help_text="The normalized URL of the website"
|
|
)
|
|
domain = models.CharField(
|
|
max_length=255,
|
|
db_index=True,
|
|
help_text="The domain extracted from the URL"
|
|
)
|
|
created_at = models.DateTimeField(
|
|
auto_now_add=True,
|
|
help_text="When the website was first added"
|
|
)
|
|
last_scanned_at = models.DateTimeField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="When the website was last scanned"
|
|
)
|
|
|
|
class Meta:
|
|
db_table = 'websites'
|
|
ordering = ['-created_at']
|
|
indexes = [
|
|
models.Index(fields=['domain']),
|
|
models.Index(fields=['-last_scanned_at']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return self.url
|
|
|
|
def save(self, *args, **kwargs):
|
|
"""Extract domain from URL before saving."""
|
|
if self.url:
|
|
from urllib.parse import urlparse
|
|
parsed = urlparse(self.url)
|
|
self.domain = parsed.netloc.lower()
|
|
super().save(*args, **kwargs)
|
|
|
|
|
|
class ScanStatus(models.TextChoices):
|
|
"""Enumeration of possible scan statuses."""
|
|
PENDING = 'pending', 'Pending'
|
|
RUNNING = 'running', 'Running'
|
|
DONE = 'done', 'Completed'
|
|
FAILED = 'failed', 'Failed'
|
|
PARTIAL = 'partial', 'Partially Completed'
|
|
|
|
|
|
class Scan(models.Model):
|
|
"""
|
|
Represents a single scan of a website.
|
|
|
|
Contains aggregated scores from various scanning tools and
|
|
links to detailed issues and metrics.
|
|
"""
|
|
|
|
id = models.UUIDField(
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
editable=False,
|
|
help_text="Unique identifier for the scan"
|
|
)
|
|
website = models.ForeignKey(
|
|
Website,
|
|
on_delete=models.CASCADE,
|
|
related_name='scans',
|
|
help_text="The website that was scanned"
|
|
)
|
|
status = models.CharField(
|
|
max_length=20,
|
|
choices=ScanStatus.choices,
|
|
default=ScanStatus.PENDING,
|
|
db_index=True,
|
|
help_text="Current status of the scan"
|
|
)
|
|
|
|
# Celery task tracking
|
|
celery_task_id = models.CharField(
|
|
max_length=255,
|
|
null=True,
|
|
blank=True,
|
|
help_text="Celery task ID for tracking"
|
|
)
|
|
|
|
# Timestamps
|
|
created_at = models.DateTimeField(
|
|
auto_now_add=True,
|
|
help_text="When the scan was created"
|
|
)
|
|
started_at = models.DateTimeField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="When the scan started running"
|
|
)
|
|
completed_at = models.DateTimeField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="When the scan completed"
|
|
)
|
|
|
|
# Aggregated scores (0-100)
|
|
performance_score = models.IntegerField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Lighthouse performance score (0-100)"
|
|
)
|
|
accessibility_score = models.IntegerField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Lighthouse accessibility score (0-100)"
|
|
)
|
|
seo_score = models.IntegerField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Lighthouse SEO score (0-100)"
|
|
)
|
|
best_practices_score = models.IntegerField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Lighthouse best practices score (0-100)"
|
|
)
|
|
security_score = models.IntegerField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Computed security score based on issues (0-100)"
|
|
)
|
|
|
|
# Overall health score (computed average)
|
|
overall_score = models.IntegerField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Overall health score (0-100)"
|
|
)
|
|
|
|
# Error tracking
|
|
error_message = models.TextField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Error message if scan failed"
|
|
)
|
|
|
|
# Raw data from scanners
|
|
raw_lighthouse_data = models.JSONField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Raw Lighthouse report data"
|
|
)
|
|
raw_zap_data = models.JSONField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Raw OWASP ZAP report data"
|
|
)
|
|
raw_playwright_data = models.JSONField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Raw Playwright analysis data"
|
|
)
|
|
raw_headers_data = models.JSONField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Raw HTTP headers analysis data"
|
|
)
|
|
|
|
class Meta:
|
|
db_table = 'scans'
|
|
ordering = ['-created_at']
|
|
indexes = [
|
|
models.Index(fields=['status']),
|
|
models.Index(fields=['-created_at']),
|
|
models.Index(fields=['website', '-created_at']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"Scan {self.id} - {self.website.url} ({self.status})"
|
|
|
|
def calculate_overall_score(self):
|
|
"""
|
|
Calculate overall health score as weighted average of all scores.
|
|
|
|
Weights:
|
|
- Performance: 25%
|
|
- Security: 30%
|
|
- Accessibility: 15%
|
|
- SEO: 15%
|
|
- Best Practices: 15%
|
|
"""
|
|
scores = [
|
|
(self.performance_score, 0.25),
|
|
(self.security_score, 0.30),
|
|
(self.accessibility_score, 0.15),
|
|
(self.seo_score, 0.15),
|
|
(self.best_practices_score, 0.15),
|
|
]
|
|
|
|
total_weight = 0
|
|
weighted_sum = 0
|
|
|
|
for score, weight in scores:
|
|
if score is not None:
|
|
weighted_sum += score * weight
|
|
total_weight += weight
|
|
|
|
if total_weight > 0:
|
|
self.overall_score = round(weighted_sum / total_weight)
|
|
else:
|
|
self.overall_score = None
|
|
|
|
return self.overall_score
|
|
|
|
def calculate_security_score(self):
|
|
"""
|
|
Calculate security score based on security issues found.
|
|
|
|
Starts at 100 and deducts points based on issue severity:
|
|
- Critical: -25 points each
|
|
- High: -15 points each
|
|
- Medium: -8 points each
|
|
- Low: -3 points each
|
|
- Info: -1 point each
|
|
"""
|
|
deductions = {
|
|
'critical': 25,
|
|
'high': 15,
|
|
'medium': 8,
|
|
'low': 3,
|
|
'info': 1,
|
|
}
|
|
|
|
score = 100
|
|
security_issues = self.issues.filter(
|
|
category__in=['security', 'headers', 'tls', 'cors']
|
|
)
|
|
|
|
for issue in security_issues:
|
|
score -= deductions.get(issue.severity, 0)
|
|
|
|
self.security_score = max(0, score)
|
|
return self.security_score
|
|
|
|
|
|
class IssueCategory(models.TextChoices):
|
|
"""Categories of issues that can be detected."""
|
|
PERFORMANCE = 'performance', 'Performance'
|
|
SECURITY = 'security', 'Security'
|
|
HEADERS = 'headers', 'HTTP Headers'
|
|
TLS = 'tls', 'TLS/SSL'
|
|
CORS = 'cors', 'CORS'
|
|
ACCESSIBILITY = 'accessibility', 'Accessibility'
|
|
SEO = 'seo', 'SEO'
|
|
BEST_PRACTICES = 'best_practices', 'Best Practices'
|
|
CONTENT = 'content', 'Content'
|
|
RESOURCES = 'resources', 'Resources'
|
|
|
|
|
|
class IssueSeverity(models.TextChoices):
|
|
"""Severity levels for issues."""
|
|
CRITICAL = 'critical', 'Critical'
|
|
HIGH = 'high', 'High'
|
|
MEDIUM = 'medium', 'Medium'
|
|
LOW = 'low', 'Low'
|
|
INFO = 'info', 'Informational'
|
|
|
|
|
|
class ScannerTool(models.TextChoices):
|
|
"""Scanner tools that can detect issues."""
|
|
LIGHTHOUSE = 'lighthouse', 'Google Lighthouse'
|
|
ZAP = 'owasp_zap', 'OWASP ZAP'
|
|
PLAYWRIGHT = 'playwright', 'Playwright'
|
|
HEADER_CHECK = 'header_check', 'HTTP Header Check'
|
|
TLS_CHECK = 'tls_check', 'TLS/SSL Check'
|
|
|
|
|
|
class Issue(models.Model):
|
|
"""
|
|
Represents a specific issue found during a scan.
|
|
|
|
Issues are categorized by type, severity, and the tool that detected them.
|
|
Each issue includes a description and suggested remediation.
|
|
"""
|
|
|
|
id = models.UUIDField(
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
editable=False
|
|
)
|
|
scan = models.ForeignKey(
|
|
Scan,
|
|
on_delete=models.CASCADE,
|
|
related_name='issues',
|
|
help_text="The scan that found this issue"
|
|
)
|
|
|
|
# Classification
|
|
category = models.CharField(
|
|
max_length=30,
|
|
choices=IssueCategory.choices,
|
|
db_index=True,
|
|
help_text="Category of the issue"
|
|
)
|
|
severity = models.CharField(
|
|
max_length=20,
|
|
choices=IssueSeverity.choices,
|
|
db_index=True,
|
|
help_text="Severity level of the issue"
|
|
)
|
|
tool = models.CharField(
|
|
max_length=30,
|
|
choices=ScannerTool.choices,
|
|
help_text="Tool that detected this issue"
|
|
)
|
|
|
|
# Issue details
|
|
title = models.CharField(
|
|
max_length=500,
|
|
help_text="Brief title of the issue"
|
|
)
|
|
description = models.TextField(
|
|
help_text="Detailed description of the issue"
|
|
)
|
|
affected_url = models.URLField(
|
|
max_length=2048,
|
|
null=True,
|
|
blank=True,
|
|
help_text="Specific URL affected by this issue"
|
|
)
|
|
remediation = models.TextField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Suggested fix or remediation"
|
|
)
|
|
|
|
# Additional data from scanner
|
|
raw_data = models.JSONField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Raw data from the scanner for this issue"
|
|
)
|
|
|
|
# Timestamps
|
|
created_at = models.DateTimeField(
|
|
auto_now_add=True
|
|
)
|
|
|
|
class Meta:
|
|
db_table = 'issues'
|
|
ordering = ['severity', '-created_at']
|
|
indexes = [
|
|
models.Index(fields=['scan', 'category']),
|
|
models.Index(fields=['scan', 'severity']),
|
|
models.Index(fields=['tool']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"[{self.severity}] {self.title}"
|
|
|
|
|
|
class MetricUnit(models.TextChoices):
|
|
"""Units of measurement for metrics."""
|
|
MILLISECONDS = 'ms', 'Milliseconds'
|
|
SECONDS = 's', 'Seconds'
|
|
BYTES = 'bytes', 'Bytes'
|
|
KILOBYTES = 'kb', 'Kilobytes'
|
|
MEGABYTES = 'mb', 'Megabytes'
|
|
SCORE = 'score', 'Score (0-1)'
|
|
PERCENT = 'percent', 'Percentage'
|
|
COUNT = 'count', 'Count'
|
|
|
|
|
|
class Metric(models.Model):
|
|
"""
|
|
Represents a specific metric measured during a scan.
|
|
|
|
Metrics are numerical values with units, such as page load time,
|
|
total byte weight, number of requests, etc.
|
|
"""
|
|
|
|
id = models.UUIDField(
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
editable=False
|
|
)
|
|
scan = models.ForeignKey(
|
|
Scan,
|
|
on_delete=models.CASCADE,
|
|
related_name='metrics',
|
|
help_text="The scan that measured this metric"
|
|
)
|
|
|
|
# Metric identification
|
|
name = models.CharField(
|
|
max_length=100,
|
|
db_index=True,
|
|
help_text="Name of the metric (e.g., 'first_contentful_paint_ms')"
|
|
)
|
|
display_name = models.CharField(
|
|
max_length=200,
|
|
help_text="Human-readable name for display"
|
|
)
|
|
|
|
# Value
|
|
value = models.FloatField(
|
|
help_text="Numeric value of the metric"
|
|
)
|
|
unit = models.CharField(
|
|
max_length=20,
|
|
choices=MetricUnit.choices,
|
|
help_text="Unit of measurement"
|
|
)
|
|
|
|
# Source
|
|
source = models.CharField(
|
|
max_length=30,
|
|
choices=ScannerTool.choices,
|
|
help_text="Tool that provided this metric"
|
|
)
|
|
|
|
# Score (if applicable)
|
|
score = models.FloatField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Lighthouse score for this metric (0-1)"
|
|
)
|
|
|
|
# Timestamp
|
|
created_at = models.DateTimeField(
|
|
auto_now_add=True
|
|
)
|
|
|
|
class Meta:
|
|
db_table = 'metrics'
|
|
ordering = ['name']
|
|
indexes = [
|
|
models.Index(fields=['scan', 'name']),
|
|
models.Index(fields=['source']),
|
|
]
|
|
# Ensure unique metric names per scan
|
|
constraints = [
|
|
models.UniqueConstraint(
|
|
fields=['scan', 'name'],
|
|
name='unique_metric_per_scan'
|
|
)
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.display_name}: {self.value} {self.unit}"
|
|
|
|
def get_formatted_value(self):
|
|
"""Return a formatted string representation of the value."""
|
|
if self.unit == MetricUnit.MILLISECONDS:
|
|
if self.value >= 1000:
|
|
return f"{self.value / 1000:.2f}s"
|
|
return f"{self.value:.0f}ms"
|
|
elif self.unit == MetricUnit.BYTES:
|
|
if self.value >= 1024 * 1024:
|
|
return f"{self.value / (1024 * 1024):.2f} MB"
|
|
elif self.value >= 1024:
|
|
return f"{self.value / 1024:.1f} KB"
|
|
return f"{self.value:.0f} bytes"
|
|
elif self.unit == MetricUnit.PERCENT:
|
|
return f"{self.value:.1f}%"
|
|
elif self.unit == MetricUnit.SCORE:
|
|
return f"{self.value:.3f}"
|
|
else:
|
|
return f"{self.value:.2f} {self.get_unit_display()}"
|