""" Database models for Website Analyzer. This module defines the core data models for storing websites, scans, issues, and metrics from various scanning tools. """ import uuid from django.db import models from django.utils import timezone from django.core.validators import URLValidator class Website(models.Model): """ Represents a website that has been scanned. Each unique URL gets one Website record, which can have multiple Scan records associated with it. """ id = models.UUIDField( primary_key=True, default=uuid.uuid4, editable=False, help_text="Unique identifier for the website" ) url = models.URLField( max_length=2048, unique=True, validators=[URLValidator(schemes=['http', 'https'])], help_text="The normalized URL of the website" ) domain = models.CharField( max_length=255, db_index=True, help_text="The domain extracted from the URL" ) created_at = models.DateTimeField( auto_now_add=True, help_text="When the website was first added" ) last_scanned_at = models.DateTimeField( null=True, blank=True, help_text="When the website was last scanned" ) class Meta: db_table = 'websites' ordering = ['-created_at'] indexes = [ models.Index(fields=['domain']), models.Index(fields=['-last_scanned_at']), ] def __str__(self): return self.url def save(self, *args, **kwargs): """Extract domain from URL before saving.""" if self.url: from urllib.parse import urlparse parsed = urlparse(self.url) self.domain = parsed.netloc.lower() super().save(*args, **kwargs) class ScanStatus(models.TextChoices): """Enumeration of possible scan statuses.""" PENDING = 'pending', 'Pending' RUNNING = 'running', 'Running' DONE = 'done', 'Completed' FAILED = 'failed', 'Failed' PARTIAL = 'partial', 'Partially Completed' class Scan(models.Model): """ Represents a single scan of a website. Contains aggregated scores from various scanning tools and links to detailed issues and metrics. """ id = models.UUIDField( primary_key=True, default=uuid.uuid4, editable=False, help_text="Unique identifier for the scan" ) website = models.ForeignKey( Website, on_delete=models.CASCADE, related_name='scans', help_text="The website that was scanned" ) status = models.CharField( max_length=20, choices=ScanStatus.choices, default=ScanStatus.PENDING, db_index=True, help_text="Current status of the scan" ) # Celery task tracking celery_task_id = models.CharField( max_length=255, null=True, blank=True, help_text="Celery task ID for tracking" ) # Timestamps created_at = models.DateTimeField( auto_now_add=True, help_text="When the scan was created" ) started_at = models.DateTimeField( null=True, blank=True, help_text="When the scan started running" ) completed_at = models.DateTimeField( null=True, blank=True, help_text="When the scan completed" ) # Aggregated scores (0-100) performance_score = models.IntegerField( null=True, blank=True, help_text="Lighthouse performance score (0-100)" ) accessibility_score = models.IntegerField( null=True, blank=True, help_text="Lighthouse accessibility score (0-100)" ) seo_score = models.IntegerField( null=True, blank=True, help_text="Lighthouse SEO score (0-100)" ) best_practices_score = models.IntegerField( null=True, blank=True, help_text="Lighthouse best practices score (0-100)" ) security_score = models.IntegerField( null=True, blank=True, help_text="Computed security score based on issues (0-100)" ) # Overall health score (computed average) overall_score = models.IntegerField( null=True, blank=True, help_text="Overall health score (0-100)" ) # Error tracking error_message = models.TextField( null=True, blank=True, help_text="Error message if scan failed" ) # Raw data from scanners raw_lighthouse_data = models.JSONField( null=True, blank=True, help_text="Raw Lighthouse report data" ) raw_zap_data = models.JSONField( null=True, blank=True, help_text="Raw OWASP ZAP report data" ) raw_playwright_data = models.JSONField( null=True, blank=True, help_text="Raw Playwright analysis data" ) raw_headers_data = models.JSONField( null=True, blank=True, help_text="Raw HTTP headers analysis data" ) class Meta: db_table = 'scans' ordering = ['-created_at'] indexes = [ models.Index(fields=['status']), models.Index(fields=['-created_at']), models.Index(fields=['website', '-created_at']), ] def __str__(self): return f"Scan {self.id} - {self.website.url} ({self.status})" def calculate_overall_score(self): """ Calculate overall health score as weighted average of all scores. Weights: - Performance: 25% - Security: 30% - Accessibility: 15% - SEO: 15% - Best Practices: 15% """ scores = [ (self.performance_score, 0.25), (self.security_score, 0.30), (self.accessibility_score, 0.15), (self.seo_score, 0.15), (self.best_practices_score, 0.15), ] total_weight = 0 weighted_sum = 0 for score, weight in scores: if score is not None: weighted_sum += score * weight total_weight += weight if total_weight > 0: self.overall_score = round(weighted_sum / total_weight) else: self.overall_score = None return self.overall_score def calculate_security_score(self): """ Calculate security score based on security issues found. Starts at 100 and deducts points based on issue severity: - Critical: -25 points each - High: -15 points each - Medium: -8 points each - Low: -3 points each - Info: -1 point each """ deductions = { 'critical': 25, 'high': 15, 'medium': 8, 'low': 3, 'info': 1, } score = 100 security_issues = self.issues.filter( category__in=['security', 'headers', 'tls', 'cors'] ) for issue in security_issues: score -= deductions.get(issue.severity, 0) self.security_score = max(0, score) return self.security_score class IssueCategory(models.TextChoices): """Categories of issues that can be detected.""" PERFORMANCE = 'performance', 'Performance' SECURITY = 'security', 'Security' HEADERS = 'headers', 'HTTP Headers' TLS = 'tls', 'TLS/SSL' CORS = 'cors', 'CORS' ACCESSIBILITY = 'accessibility', 'Accessibility' SEO = 'seo', 'SEO' BEST_PRACTICES = 'best_practices', 'Best Practices' CONTENT = 'content', 'Content' RESOURCES = 'resources', 'Resources' class IssueSeverity(models.TextChoices): """Severity levels for issues.""" CRITICAL = 'critical', 'Critical' HIGH = 'high', 'High' MEDIUM = 'medium', 'Medium' LOW = 'low', 'Low' INFO = 'info', 'Informational' class ScannerTool(models.TextChoices): """Scanner tools that can detect issues.""" LIGHTHOUSE = 'lighthouse', 'Google Lighthouse' ZAP = 'owasp_zap', 'OWASP ZAP' PLAYWRIGHT = 'playwright', 'Playwright' HEADER_CHECK = 'header_check', 'HTTP Header Check' TLS_CHECK = 'tls_check', 'TLS/SSL Check' class Issue(models.Model): """ Represents a specific issue found during a scan. Issues are categorized by type, severity, and the tool that detected them. Each issue includes a description and suggested remediation. """ id = models.UUIDField( primary_key=True, default=uuid.uuid4, editable=False ) scan = models.ForeignKey( Scan, on_delete=models.CASCADE, related_name='issues', help_text="The scan that found this issue" ) # Classification category = models.CharField( max_length=30, choices=IssueCategory.choices, db_index=True, help_text="Category of the issue" ) severity = models.CharField( max_length=20, choices=IssueSeverity.choices, db_index=True, help_text="Severity level of the issue" ) tool = models.CharField( max_length=30, choices=ScannerTool.choices, help_text="Tool that detected this issue" ) # Issue details title = models.CharField( max_length=500, help_text="Brief title of the issue" ) description = models.TextField( help_text="Detailed description of the issue" ) affected_url = models.URLField( max_length=2048, null=True, blank=True, help_text="Specific URL affected by this issue" ) remediation = models.TextField( null=True, blank=True, help_text="Suggested fix or remediation" ) # Additional data from scanner raw_data = models.JSONField( null=True, blank=True, help_text="Raw data from the scanner for this issue" ) # Timestamps created_at = models.DateTimeField( auto_now_add=True ) class Meta: db_table = 'issues' ordering = ['severity', '-created_at'] indexes = [ models.Index(fields=['scan', 'category']), models.Index(fields=['scan', 'severity']), models.Index(fields=['tool']), ] def __str__(self): return f"[{self.severity}] {self.title}" class MetricUnit(models.TextChoices): """Units of measurement for metrics.""" MILLISECONDS = 'ms', 'Milliseconds' SECONDS = 's', 'Seconds' BYTES = 'bytes', 'Bytes' KILOBYTES = 'kb', 'Kilobytes' MEGABYTES = 'mb', 'Megabytes' SCORE = 'score', 'Score (0-1)' PERCENT = 'percent', 'Percentage' COUNT = 'count', 'Count' class Metric(models.Model): """ Represents a specific metric measured during a scan. Metrics are numerical values with units, such as page load time, total byte weight, number of requests, etc. """ id = models.UUIDField( primary_key=True, default=uuid.uuid4, editable=False ) scan = models.ForeignKey( Scan, on_delete=models.CASCADE, related_name='metrics', help_text="The scan that measured this metric" ) # Metric identification name = models.CharField( max_length=100, db_index=True, help_text="Name of the metric (e.g., 'first_contentful_paint_ms')" ) display_name = models.CharField( max_length=200, help_text="Human-readable name for display" ) # Value value = models.FloatField( help_text="Numeric value of the metric" ) unit = models.CharField( max_length=20, choices=MetricUnit.choices, help_text="Unit of measurement" ) # Source source = models.CharField( max_length=30, choices=ScannerTool.choices, help_text="Tool that provided this metric" ) # Score (if applicable) score = models.FloatField( null=True, blank=True, help_text="Lighthouse score for this metric (0-1)" ) # Timestamp created_at = models.DateTimeField( auto_now_add=True ) class Meta: db_table = 'metrics' ordering = ['name'] indexes = [ models.Index(fields=['scan', 'name']), models.Index(fields=['source']), ] # Ensure unique metric names per scan constraints = [ models.UniqueConstraint( fields=['scan', 'name'], name='unique_metric_per_scan' ) ] def __str__(self): return f"{self.display_name}: {self.value} {self.unit}" def get_formatted_value(self): """Return a formatted string representation of the value.""" if self.unit == MetricUnit.MILLISECONDS: if self.value >= 1000: return f"{self.value / 1000:.2f}s" return f"{self.value:.0f}ms" elif self.unit == MetricUnit.BYTES: if self.value >= 1024 * 1024: return f"{self.value / (1024 * 1024):.2f} MB" elif self.value >= 1024: return f"{self.value / 1024:.1f} KB" return f"{self.value:.0f} bytes" elif self.unit == MetricUnit.PERCENT: return f"{self.value:.1f}%" elif self.unit == MetricUnit.SCORE: return f"{self.value:.3f}" else: return f"{self.value:.2f} {self.get_unit_display()}"