From f92406356f68ebd02ef66d025fd69827f8bf3c7d Mon Sep 17 00:00:00 2001 From: Sereth1 Date: Mon, 8 Dec 2025 10:35:41 +0700 Subject: [PATCH] more --- .gitignore | 49 +++ README.md | 429 ++++++++++++++++++++ backend/core/settings.py | 69 ++-- backend/pytest.ini | 9 + backend/static/.gitkeep | 1 + backend/tests/__init__.py | 11 + backend/tests/test_scanner_parsing.py | 347 ++++++++++++++++ backend/websites/migrations/0001_initial.py | 112 +++++ backend/websites/migrations/__init__.py | 0 9 files changed, 1002 insertions(+), 25 deletions(-) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 backend/pytest.ini create mode 100644 backend/static/.gitkeep create mode 100644 backend/tests/__init__.py create mode 100644 backend/tests/test_scanner_parsing.py create mode 100644 backend/websites/migrations/0001_initial.py create mode 100644 backend/websites/migrations/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e036099 --- /dev/null +++ b/.gitignore @@ -0,0 +1,49 @@ +# Virtual environments +.venv/ +venv/ +env/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg +*.egg-info/ +dist/ +build/ + +# Django +*.log +local_settings.py +db.sqlite3 +media/ +staticfiles/ + +# Environment +.env +*.env.local + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Node +node_modules/ +npm-debug.log + +# Docker +*.log + +# Coverage +htmlcov/ +.coverage +.coverage.* diff --git a/README.md b/README.md new file mode 100644 index 0000000..63653be --- /dev/null +++ b/README.md @@ -0,0 +1,429 @@ +# Secure Web Analyzer + +A comprehensive web application security and performance analyzer built with Django, Celery, and modern scanning tools. + +## Features + +- **Performance Analysis**: Uses Google Lighthouse for Core Web Vitals and performance metrics +- **Security Scanning**: Integrates OWASP ZAP for vulnerability detection +- **Browser Analysis**: Playwright-based console error and network analysis +- **Header Security**: Checks HTTP security headers and TLS configuration +- **Async Processing**: Celery workers for background scan processing +- **REST API**: Full API access to all scanning functionality + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Frontend (Templates) │ +│ Tailwind CSS + Alpine.js + Chart.js │ +└────────────────────────────┬────────────────────────────────────┘ + │ +┌────────────────────────────▼────────────────────────────────────┐ +│ Django REST Framework │ +│ /api/scans, /api/websites │ +└────────────────────────────┬────────────────────────────────────┘ + │ + ┌──────────────────┼──────────────────┐ + │ │ │ +┌─────────▼─────────┐ ┌──────▼──────┐ ┌────────▼────────┐ +│ PostgreSQL DB │ │ Redis │ │ Celery Worker │ +│ Scans, Issues, │ │Message Queue│ │ Background │ +│ Metrics │ │ │ │ Processing │ +└───────────────────┘ └─────────────┘ └────────┬────────┘ + │ + ┌──────────────────────────────────────┼──────────────────────┐ + │ │ │ +┌───────▼───────┐ ┌─────────────────┐ ┌────────▼────────┐ ┌──────────▼──────────┐ +│ Lighthouse │ │ OWASP ZAP │ │ Playwright │ │ Headers Scanner │ +│ (Node.js) │ │ (Docker) │ │ (Python) │ │ (requests/ssl) │ +│ Port 3001 │ │ Port 8081 │ │ │ │ │ +└───────────────┘ └─────────────────┘ └─────────────────┘ └─────────────────────┘ +``` + +## Quick Start + +### Prerequisites + +- Docker & Docker Compose +- Git + +### 1. Clone and Configure + +```bash +git clone +cd secure-web + +# Copy environment file +cp backend/.env.example backend/.env + +# Edit .env with your settings (optional for development) +``` + +### 2. Start the Stack + +```bash +# Build and start all services +docker-compose up --build -d + +# View logs +docker-compose logs -f + +# Check service status +docker-compose ps +``` + +### 3. Initialize Database + +```bash +# Run migrations +docker-compose exec web python manage.py migrate + +# Create superuser (optional) +docker-compose exec web python manage.py createsuperuser +``` + +### 4. Access the Application + +- **Web Interface**: http://localhost:8000 +- **Admin Panel**: http://localhost:8000/admin +- **API Documentation**: http://localhost:8000/api/ + +## Running a Scan + +### Via Web Interface + +1. Navigate to http://localhost:8000 +2. Enter a URL in the input field (e.g., `https://example.com`) +3. Click "Scan Website" +4. Wait for the scan to complete (typically 1-3 minutes) +5. View results including scores, metrics, and issues + +### Via API + +```bash +# Create a new scan +curl -X POST http://localhost:8000/api/scans/ \ + -H "Content-Type: application/json" \ + -d '{"url": "https://example.com"}' + +# Response: +# { +# "id": "uuid-here", +# "url": "https://example.com", +# "status": "pending", +# ... +# } + +# Check scan status +curl http://localhost:8000/api/scans/{scan-id}/ + +# List all scans +curl http://localhost:8000/api/scans/ + +# Get issues for a scan +curl "http://localhost:8000/api/issues/?scan={scan-id}" +``` + +## API Endpoints + +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/api/scans/` | List all scans | +| POST | `/api/scans/` | Create new scan | +| GET | `/api/scans/{id}/` | Get scan details | +| GET | `/api/websites/` | List all websites | +| GET | `/api/issues/` | List all issues | +| GET | `/api/issues/?scan={id}` | Issues for specific scan | +| GET | `/api/issues/?severity=high` | Filter by severity | + +## Scanner Integration + +### Lighthouse (Performance) + +The Lighthouse scanner runs as a separate Node.js service and provides: +- **Performance Score**: Overall performance rating +- **Core Web Vitals**: FCP, LCP, CLS, TTI, TBT +- **Resource Analysis**: Unused JS, render-blocking resources +- **Best Practices**: Modern web development compliance + +```python +# Internal service call +POST http://lighthouse:3001/scan +{ + "url": "https://example.com", + "options": { + "preset": "desktop" + } +} +``` + +### OWASP ZAP (Security) + +ZAP performs active security scanning: +- **Spider Crawling**: Discovers URLs and entry points +- **Passive Scanning**: Analyzes responses for vulnerabilities +- **Alert Detection**: XSS, injection, misconfigurations + +```python +# ZAP API endpoints used +GET http://zap:8081/JSON/spider/action/scan/ +GET http://zap:8081/JSON/pscan/view/recordsToScan/ +GET http://zap:8081/JSON/core/view/alerts/ +``` + +### Playwright (Browser Analysis) + +Playwright performs real browser analysis: +- **Console Errors**: JavaScript errors and warnings +- **Network Metrics**: Response times, failed requests +- **Memory Metrics**: JS heap size monitoring +- **Resource Loading**: Images, scripts, stylesheets + +### Headers Scanner (HTTP Security) + +Checks security headers and TLS configuration: +- **Security Headers**: CSP, HSTS, X-Frame-Options, etc. +- **Cookie Security**: Secure, HttpOnly, SameSite flags +- **TLS Certificate**: Validity, expiration, issuer +- **Information Disclosure**: Server version headers + +## Configuration + +### Environment Variables + +```bash +# Django +SECRET_KEY=your-secret-key +DEBUG=True +ALLOWED_HOSTS=localhost,127.0.0.1 + +# Database +DATABASE_URL=postgres://user:pass@db:5432/secure_web + +# Redis +REDIS_URL=redis://redis:6379/0 +CELERY_BROKER_URL=redis://redis:6379/0 + +# Scanner Services +LIGHTHOUSE_URL=http://lighthouse:3001 +ZAP_API_URL=http://zap:8081 +ZAP_API_KEY=changeme + +# Scanner Timeouts +LIGHTHOUSE_TIMEOUT=120 +ZAP_TIMEOUT=300 +PLAYWRIGHT_TIMEOUT=60 +``` + +### Scanner Configuration + +Modify `backend/core/settings.py`: + +```python +SCANNER_CONFIG = { + 'lighthouse': { + 'url': os.getenv('LIGHTHOUSE_URL', 'http://lighthouse:3001'), + 'timeout': int(os.getenv('LIGHTHOUSE_TIMEOUT', '120')), + 'preset': 'desktop', # or 'mobile' + }, + 'zap': { + 'url': os.getenv('ZAP_API_URL', 'http://zap:8081'), + 'api_key': os.getenv('ZAP_API_KEY', 'changeme'), + 'timeout': int(os.getenv('ZAP_TIMEOUT', '300')), + 'spider_max_depth': 3, + }, + 'playwright': { + 'timeout': int(os.getenv('PLAYWRIGHT_TIMEOUT', '60')), + 'viewport': {'width': 1920, 'height': 1080}, + }, + 'headers': { + 'timeout': 30, + 'verify_ssl': True, + }, +} +``` + +## Development + +### Running Locally (without Docker) + +```bash +# Backend setup +cd backend +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt + +# Set environment +export DATABASE_URL=postgres://user:pass@localhost:5432/secure_web +export REDIS_URL=redis://localhost:6379/0 + +# Run migrations +python manage.py migrate + +# Start Django +python manage.py runserver + +# Start Celery (separate terminal) +celery -A core worker -l INFO + +# Start Celery Beat (separate terminal) +celery -A core beat -l INFO +``` + +### Running Tests + +```bash +# Run all tests +docker-compose exec web pytest + +# Run specific test file +docker-compose exec web pytest tests/test_validators.py -v + +# Run with coverage +docker-compose exec web pytest --cov=. --cov-report=html + +# Local testing +cd backend +pytest tests/ -v +``` + +### Code Structure + +``` +secure-web/ +├── backend/ +│ ├── core/ # Django project settings +│ │ ├── settings.py +│ │ ├── urls.py +│ │ ├── celery.py +│ │ └── wsgi.py +│ ├── websites/ # Main app - models +│ │ ├── models.py # Website, Scan, Issue, Metric +│ │ └── admin.py +│ ├── api/ # DRF API +│ │ ├── views.py +│ │ ├── serializers.py +│ │ └── urls.py +│ ├── scanner/ # Scanner modules +│ │ ├── base.py # BaseScanner ABC +│ │ ├── validators.py # URL validation, SSRF protection +│ │ ├── headers_scanner.py +│ │ ├── lighthouse_scanner.py +│ │ ├── playwright_scanner.py +│ │ ├── zap_scanner.py +│ │ ├── runner.py # Orchestrator +│ │ └── tasks.py # Celery tasks +│ ├── templates/ # Frontend templates +│ │ ├── base.html +│ │ ├── index.html +│ │ └── scan_detail.html +│ └── tests/ # Unit tests +│ ├── test_validators.py +│ ├── test_scans.py +│ └── test_scanner_parsing.py +├── lighthouse/ # Lighthouse Node.js service +│ ├── server.js +│ ├── package.json +│ └── Dockerfile +└── docker-compose.yml +``` + +## Issue Categories + +| Category | Source | Description | +|----------|--------|-------------| +| `performance` | Lighthouse | Speed, loading, rendering issues | +| `security` | ZAP, Headers | Vulnerabilities, misconfigurations | +| `accessibility` | Lighthouse | WCAG compliance issues | +| `seo` | Lighthouse | Search optimization issues | +| `best_practices` | Lighthouse | Modern web standards | +| `console_errors` | Playwright | JavaScript runtime errors | +| `network` | Playwright | Failed requests, slow responses | +| `headers` | Headers | Missing security headers | +| `tls` | Headers | Certificate issues | +| `cookies` | Headers | Insecure cookie settings | + +## Issue Severities + +| Level | Color | Description | +|-------|-------|-------------| +| `critical` | Red | Immediate action required | +| `high` | Orange | Significant security/performance risk | +| `medium` | Yellow | Should be addressed | +| `low` | Blue | Minor improvement | +| `info` | Gray | Informational only | + +## Troubleshooting + +### Common Issues + +**Services not starting:** +```bash +# Check logs +docker-compose logs web +docker-compose logs celery_worker +docker-compose logs lighthouse +docker-compose logs zap + +# Restart services +docker-compose restart +``` + +**Database connection errors:** +```bash +# Wait for DB to be ready +docker-compose exec web python manage.py wait_for_db + +# Check DB status +docker-compose exec db psql -U secure_web -c "\l" +``` + +**ZAP not responding:** +```bash +# ZAP takes time to start, wait 30-60 seconds +docker-compose logs zap + +# Check ZAP status +curl http://localhost:8081/JSON/core/view/version/ +``` + +**Scan stuck in pending:** +```bash +# Check Celery worker +docker-compose logs celery_worker + +# Restart worker +docker-compose restart celery_worker +``` + +### Performance Tips + +- For production, use a dedicated ZAP instance +- Consider caching Lighthouse results for repeated scans +- Adjust timeouts based on target website complexity +- Use Redis persistence for task queue durability + +## Security Considerations + +- URL validation includes SSRF protection (blocks private IPs) +- ZAP API key should be changed in production +- Consider rate limiting scan endpoints +- Validate and sanitize all user inputs +- Run containers with minimal privileges + +## License + +MIT License - See LICENSE file for details. + +## Contributing + +1. Fork the repository +2. Create a feature branch +3. Write tests for new functionality +4. Submit a pull request + +## Support + +For issues and feature requests, please use the GitHub issue tracker. diff --git a/backend/core/settings.py b/backend/core/settings.py index 209c956..a076017 100644 --- a/backend/core/settings.py +++ b/backend/core/settings.py @@ -79,28 +79,37 @@ WSGI_APPLICATION = 'core.wsgi.application' # Database -# Parse DATABASE_URL or use default PostgreSQL settings +# Parse DATABASE_URL or use SQLite for local development -DATABASE_URL = os.getenv('DATABASE_URL', 'postgres://analyzer:analyzer_password@localhost:5432/website_analyzer') +DATABASE_URL = os.getenv('DATABASE_URL', '') -# Parse the DATABASE_URL -import re -db_pattern = r'postgres://(?P[^:]+):(?P[^@]+)@(?P[^:]+):(?P\d+)/(?P.+)' -db_match = re.match(db_pattern, DATABASE_URL) - -if db_match: - DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.postgresql', - 'NAME': db_match.group('name'), - 'USER': db_match.group('user'), - 'PASSWORD': db_match.group('password'), - 'HOST': db_match.group('host'), - 'PORT': db_match.group('port'), +if DATABASE_URL: + # Parse the DATABASE_URL for PostgreSQL + import re + db_pattern = r'postgres://(?P[^:]+):(?P[^@]+)@(?P[^:]+):(?P\d+)/(?P.+)' + db_match = re.match(db_pattern, DATABASE_URL) + + if db_match: + DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': db_match.group('name'), + 'USER': db_match.group('user'), + 'PASSWORD': db_match.group('password'), + 'HOST': db_match.group('host'), + 'PORT': db_match.group('port'), + } + } + else: + # Invalid DATABASE_URL format, fallback to SQLite + DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': BASE_DIR / 'db.sqlite3', + } } - } else: - # Fallback for development + # No DATABASE_URL set - use SQLite for local development DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', @@ -191,15 +200,25 @@ CELERY_TASK_SOFT_TIME_LIMIT = CELERY_TASK_TIME_LIMIT - 30 # ============================================================================= -# Redis Cache Configuration +# Cache Configuration # ============================================================================= -REDIS_URL = os.getenv('REDIS_URL', 'redis://localhost:6379/0') -CACHES = { - 'default': { - 'BACKEND': 'django.core.cache.backends.redis.RedisCache', - 'LOCATION': REDIS_URL, +REDIS_URL = os.getenv('REDIS_URL', '') + +if REDIS_URL: + CACHES = { + 'default': { + 'BACKEND': 'django.core.cache.backends.redis.RedisCache', + 'LOCATION': REDIS_URL, + } + } +else: + # Use local memory cache for development without Redis + CACHES = { + 'default': { + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', + 'LOCATION': 'unique-snowflake', + } } -} # ============================================================================= diff --git a/backend/pytest.ini b/backend/pytest.ini new file mode 100644 index 0000000..01eb6b5 --- /dev/null +++ b/backend/pytest.ini @@ -0,0 +1,9 @@ +# Pytest configuration + +[pytest] +DJANGO_SETTINGS_MODULE = core.settings +python_files = test_*.py +python_classes = Test* +python_functions = test_* +addopts = -v --tb=short +testpaths = tests diff --git a/backend/static/.gitkeep b/backend/static/.gitkeep new file mode 100644 index 0000000..e166442 --- /dev/null +++ b/backend/static/.gitkeep @@ -0,0 +1 @@ +.gitkeep diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 0000000..c4e657f --- /dev/null +++ b/backend/tests/__init__.py @@ -0,0 +1,11 @@ +""" +Tests configuration and fixtures. +""" +import os +import sys + +# Add backend to Python path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# Set Django settings module +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') diff --git a/backend/tests/test_scanner_parsing.py b/backend/tests/test_scanner_parsing.py new file mode 100644 index 0000000..fbbf020 --- /dev/null +++ b/backend/tests/test_scanner_parsing.py @@ -0,0 +1,347 @@ +""" +Tests for scanner result parsing. +""" + +import pytest +import json +from pathlib import Path + + +# Sample Lighthouse JSON response +SAMPLE_LIGHTHOUSE_RESPONSE = { + "scanId": "test-123", + "url": "https://example.com", + "scores": { + "performance": 85, + "accessibility": 90, + "bestPractices": 75, + "seo": 80 + }, + "metrics": { + "firstContentfulPaint": {"value": 1200, "unit": "ms", "score": 0.9}, + "largestContentfulPaint": {"value": 2500, "unit": "ms", "score": 0.75}, + "speedIndex": {"value": 3400, "unit": "ms", "score": 0.7}, + "timeToInteractive": {"value": 4500, "unit": "ms", "score": 0.65}, + "totalBlockingTime": {"value": 150, "unit": "ms", "score": 0.85}, + "cumulativeLayoutShift": {"value": 0.1, "unit": "score", "score": 0.95} + }, + "resources": { + "totalByteWeight": 2500000, + "unusedJavascript": [ + {"url": "https://example.com/bundle.js", "wastedBytes": 150000} + ], + "renderBlockingResources": [ + {"url": "https://example.com/styles.css", "wastedMs": 500} + ] + }, + "diagnostics": { + "numRequests": 45, + "numScripts": 12, + "numStylesheets": 3, + "numImages": 20 + }, + "issues": [ + { + "id": "uses-long-cache-ttl", + "category": "performance", + "title": "Serve static assets with an efficient cache policy", + "description": "A long cache lifetime can speed up repeat visits.", + "score": 0.3, + "impact": 5 + } + ] +} + + +class TestLighthouseResultParsing: + """Tests for parsing Lighthouse scanner results.""" + + def test_parse_scores(self): + """Test extracting scores from Lighthouse response.""" + scores = SAMPLE_LIGHTHOUSE_RESPONSE['scores'] + + assert scores['performance'] == 85 + assert scores['accessibility'] == 90 + assert scores['bestPractices'] == 75 + assert scores['seo'] == 80 + + def test_parse_core_web_vitals(self): + """Test extracting Core Web Vitals metrics.""" + metrics = SAMPLE_LIGHTHOUSE_RESPONSE['metrics'] + + # FCP + assert metrics['firstContentfulPaint']['value'] == 1200 + assert metrics['firstContentfulPaint']['unit'] == 'ms' + + # LCP + assert metrics['largestContentfulPaint']['value'] == 2500 + + # CLS + assert metrics['cumulativeLayoutShift']['value'] == 0.1 + assert metrics['cumulativeLayoutShift']['unit'] == 'score' + + def test_parse_resource_metrics(self): + """Test extracting resource metrics.""" + resources = SAMPLE_LIGHTHOUSE_RESPONSE['resources'] + diagnostics = SAMPLE_LIGHTHOUSE_RESPONSE['diagnostics'] + + assert resources['totalByteWeight'] == 2500000 + assert diagnostics['numRequests'] == 45 + assert diagnostics['numScripts'] == 12 + + def test_parse_issues(self): + """Test extracting issues from Lighthouse.""" + issues = SAMPLE_LIGHTHOUSE_RESPONSE['issues'] + + assert len(issues) == 1 + issue = issues[0] + assert issue['category'] == 'performance' + assert issue['title'] == 'Serve static assets with an efficient cache policy' + + +# Sample ZAP response +SAMPLE_ZAP_ALERTS = [ + { + "alert": "Cross-Site Scripting (Reflected)", + "risk": "3", # High + "confidence": "2", + "cweid": "79", + "wascid": "8", + "description": "Cross-site Scripting (XSS) is an attack technique...", + "url": "https://example.com/search?q=test", + "param": "q", + "evidence": "", + "solution": "Phase: Architecture and Design\nUse a vetted library...", + "reference": "https://owasp.org/www-community/attacks/xss/" + }, + { + "alert": "Missing Anti-clickjacking Header", + "risk": "2", # Medium + "confidence": "3", + "cweid": "1021", + "wascid": "15", + "description": "The response does not include X-Frame-Options...", + "url": "https://example.com/", + "solution": "Ensure X-Frame-Options HTTP header is included...", + "reference": "https://owasp.org/www-community/Security_Headers" + }, + { + "alert": "Server Leaks Information", + "risk": "1", # Low + "confidence": "3", + "cweid": "200", + "description": "The web/application server is leaking information...", + "url": "https://example.com/", + "evidence": "nginx/1.18.0", + "solution": "Configure the server to hide version information." + }, + { + "alert": "Information Disclosure", + "risk": "0", # Info + "confidence": "2", + "description": "This is an informational finding.", + "url": "https://example.com/" + } +] + + +class TestZAPResultParsing: + """Tests for parsing OWASP ZAP results.""" + + def test_parse_alert_severity(self): + """Test mapping ZAP risk levels to severity.""" + risk_mapping = { + '0': 'info', + '1': 'low', + '2': 'medium', + '3': 'high', + } + + for alert in SAMPLE_ZAP_ALERTS: + risk = alert['risk'] + expected_severity = risk_mapping[risk] + assert expected_severity in ['info', 'low', 'medium', 'high'] + + def test_parse_xss_alert(self): + """Test parsing XSS vulnerability alert.""" + xss_alert = SAMPLE_ZAP_ALERTS[0] + + assert xss_alert['alert'] == 'Cross-Site Scripting (Reflected)' + assert xss_alert['risk'] == '3' # High + assert xss_alert['cweid'] == '79' # XSS CWE ID + assert 'q' in xss_alert['param'] + + def test_parse_header_alert(self): + """Test parsing missing header alert.""" + header_alert = SAMPLE_ZAP_ALERTS[1] + + assert 'X-Frame-Options' in header_alert['alert'] + assert header_alert['risk'] == '2' # Medium + + def test_categorize_alerts(self): + """Test categorizing ZAP alerts.""" + def categorize(alert_name): + alert_lower = alert_name.lower() + if 'xss' in alert_lower or 'cross-site scripting' in alert_lower: + return 'security' + if 'header' in alert_lower or 'x-frame' in alert_lower: + return 'headers' + if 'cookie' in alert_lower: + return 'security' + return 'security' + + assert categorize(SAMPLE_ZAP_ALERTS[0]['alert']) == 'security' + assert categorize(SAMPLE_ZAP_ALERTS[1]['alert']) == 'headers' + + +# Sample HTTP headers response +SAMPLE_HEADERS = { + 'content-type': 'text/html; charset=utf-8', + 'server': 'nginx/1.18.0', + 'x-powered-by': 'Express', + 'strict-transport-security': 'max-age=31536000; includeSubDomains', + 'x-content-type-options': 'nosniff', + 'x-frame-options': 'SAMEORIGIN', + # Missing: Content-Security-Policy, Referrer-Policy, Permissions-Policy +} + + +class TestHeadersResultParsing: + """Tests for parsing HTTP headers analysis.""" + + REQUIRED_HEADERS = [ + 'strict-transport-security', + 'content-security-policy', + 'x-frame-options', + 'x-content-type-options', + 'referrer-policy', + 'permissions-policy', + ] + + def test_detect_present_headers(self): + """Test detecting which security headers are present.""" + headers_lower = {k.lower(): v for k, v in SAMPLE_HEADERS.items()} + + present = [h for h in self.REQUIRED_HEADERS if h in headers_lower] + + assert 'strict-transport-security' in present + assert 'x-frame-options' in present + assert 'x-content-type-options' in present + + def test_detect_missing_headers(self): + """Test detecting which security headers are missing.""" + headers_lower = {k.lower(): v for k, v in SAMPLE_HEADERS.items()} + + missing = [h for h in self.REQUIRED_HEADERS if h not in headers_lower] + + assert 'content-security-policy' in missing + assert 'referrer-policy' in missing + assert 'permissions-policy' in missing + + def test_detect_information_disclosure(self): + """Test detecting information disclosure headers.""" + info_disclosure_headers = ['server', 'x-powered-by', 'x-aspnet-version'] + + disclosed = [ + h for h in info_disclosure_headers + if h.lower() in {k.lower() for k in SAMPLE_HEADERS.keys()} + ] + + assert 'server' in disclosed + assert 'x-powered-by' in disclosed + + def test_check_hsts_max_age(self): + """Test checking HSTS max-age value.""" + hsts = SAMPLE_HEADERS.get('strict-transport-security', '') + + # Extract max-age + if 'max-age=' in hsts.lower(): + max_age_str = hsts.lower().split('max-age=')[1].split(';')[0] + max_age = int(max_age_str) + + # Should be at least 1 year (31536000 seconds) + assert max_age >= 31536000 + + +class TestScannerResultIntegration: + """Integration tests for combining scanner results.""" + + def test_aggregate_scores(self): + """Test aggregating scores from multiple scanners.""" + lighthouse_scores = SAMPLE_LIGHTHOUSE_RESPONSE['scores'] + + # Simulate security score from ZAP findings + security_score = 100 + for alert in SAMPLE_ZAP_ALERTS: + risk = alert['risk'] + if risk == '3': + security_score -= 15 # High + elif risk == '2': + security_score -= 8 # Medium + elif risk == '1': + security_score -= 3 # Low + else: + security_score -= 1 # Info + + security_score = max(0, security_score) + + # Calculate overall (simplified) + overall = ( + lighthouse_scores['performance'] * 0.25 + + security_score * 0.30 + + lighthouse_scores['accessibility'] * 0.15 + + lighthouse_scores['seo'] * 0.15 + + lighthouse_scores['bestPractices'] * 0.15 + ) + + assert 0 <= overall <= 100 + + def test_combine_issues(self): + """Test combining issues from multiple scanners.""" + # Lighthouse issues + lighthouse_issues = [ + { + 'category': 'performance', + 'severity': 'medium', + 'tool': 'lighthouse', + 'title': issue['title'] + } + for issue in SAMPLE_LIGHTHOUSE_RESPONSE['issues'] + ] + + # ZAP issues + risk_to_severity = {'0': 'info', '1': 'low', '2': 'medium', '3': 'high'} + zap_issues = [ + { + 'category': 'security', + 'severity': risk_to_severity[alert['risk']], + 'tool': 'owasp_zap', + 'title': alert['alert'] + } + for alert in SAMPLE_ZAP_ALERTS + ] + + # Header issues + headers_lower = {k.lower(): v for k, v in SAMPLE_HEADERS.items()} + header_issues = [ + { + 'category': 'headers', + 'severity': 'high' if h == 'content-security-policy' else 'medium', + 'tool': 'header_check', + 'title': f'Missing {h} header' + } + for h in ['content-security-policy', 'referrer-policy', 'permissions-policy'] + if h not in headers_lower + ] + + all_issues = lighthouse_issues + zap_issues + header_issues + + assert len(all_issues) > 0 + + # Count by severity + severity_counts = {} + for issue in all_issues: + severity = issue['severity'] + severity_counts[severity] = severity_counts.get(severity, 0) + 1 + + assert 'high' in severity_counts or 'medium' in severity_counts diff --git a/backend/websites/migrations/0001_initial.py b/backend/websites/migrations/0001_initial.py new file mode 100644 index 0000000..9d43b5f --- /dev/null +++ b/backend/websites/migrations/0001_initial.py @@ -0,0 +1,112 @@ +# Generated by Django 5.2.9 on 2025-12-08 03:33 + +import django.core.validators +import django.db.models.deletion +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Website', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, help_text='Unique identifier for the website', primary_key=True, serialize=False)), + ('url', models.URLField(help_text='The normalized URL of the website', max_length=2048, unique=True, validators=[django.core.validators.URLValidator(schemes=['http', 'https'])])), + ('domain', models.CharField(db_index=True, help_text='The domain extracted from the URL', max_length=255)), + ('created_at', models.DateTimeField(auto_now_add=True, help_text='When the website was first added')), + ('last_scanned_at', models.DateTimeField(blank=True, help_text='When the website was last scanned', null=True)), + ], + options={ + 'db_table': 'websites', + 'ordering': ['-created_at'], + 'indexes': [models.Index(fields=['domain'], name='websites_domain_9fabc6_idx'), models.Index(fields=['-last_scanned_at'], name='websites_last_sc_15be22_idx')], + }, + ), + migrations.CreateModel( + name='Scan', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, help_text='Unique identifier for the scan', primary_key=True, serialize=False)), + ('status', models.CharField(choices=[('pending', 'Pending'), ('running', 'Running'), ('done', 'Completed'), ('failed', 'Failed'), ('partial', 'Partially Completed')], db_index=True, default='pending', help_text='Current status of the scan', max_length=20)), + ('celery_task_id', models.CharField(blank=True, help_text='Celery task ID for tracking', max_length=255, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True, help_text='When the scan was created')), + ('started_at', models.DateTimeField(blank=True, help_text='When the scan started running', null=True)), + ('completed_at', models.DateTimeField(blank=True, help_text='When the scan completed', null=True)), + ('performance_score', models.IntegerField(blank=True, help_text='Lighthouse performance score (0-100)', null=True)), + ('accessibility_score', models.IntegerField(blank=True, help_text='Lighthouse accessibility score (0-100)', null=True)), + ('seo_score', models.IntegerField(blank=True, help_text='Lighthouse SEO score (0-100)', null=True)), + ('best_practices_score', models.IntegerField(blank=True, help_text='Lighthouse best practices score (0-100)', null=True)), + ('security_score', models.IntegerField(blank=True, help_text='Computed security score based on issues (0-100)', null=True)), + ('overall_score', models.IntegerField(blank=True, help_text='Overall health score (0-100)', null=True)), + ('error_message', models.TextField(blank=True, help_text='Error message if scan failed', null=True)), + ('raw_lighthouse_data', models.JSONField(blank=True, help_text='Raw Lighthouse report data', null=True)), + ('raw_zap_data', models.JSONField(blank=True, help_text='Raw OWASP ZAP report data', null=True)), + ('raw_playwright_data', models.JSONField(blank=True, help_text='Raw Playwright analysis data', null=True)), + ('raw_headers_data', models.JSONField(blank=True, help_text='Raw HTTP headers analysis data', null=True)), + ('website', models.ForeignKey(help_text='The website that was scanned', on_delete=django.db.models.deletion.CASCADE, related_name='scans', to='websites.website')), + ], + options={ + 'db_table': 'scans', + 'ordering': ['-created_at'], + }, + ), + migrations.CreateModel( + name='Metric', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('name', models.CharField(db_index=True, help_text="Name of the metric (e.g., 'first_contentful_paint_ms')", max_length=100)), + ('display_name', models.CharField(help_text='Human-readable name for display', max_length=200)), + ('value', models.FloatField(help_text='Numeric value of the metric')), + ('unit', models.CharField(choices=[('ms', 'Milliseconds'), ('s', 'Seconds'), ('bytes', 'Bytes'), ('kb', 'Kilobytes'), ('mb', 'Megabytes'), ('score', 'Score (0-1)'), ('percent', 'Percentage'), ('count', 'Count')], help_text='Unit of measurement', max_length=20)), + ('source', models.CharField(choices=[('lighthouse', 'Google Lighthouse'), ('owasp_zap', 'OWASP ZAP'), ('playwright', 'Playwright'), ('header_check', 'HTTP Header Check'), ('tls_check', 'TLS/SSL Check')], help_text='Tool that provided this metric', max_length=30)), + ('score', models.FloatField(blank=True, help_text='Lighthouse score for this metric (0-1)', null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('scan', models.ForeignKey(help_text='The scan that measured this metric', on_delete=django.db.models.deletion.CASCADE, related_name='metrics', to='websites.scan')), + ], + options={ + 'db_table': 'metrics', + 'ordering': ['name'], + 'indexes': [models.Index(fields=['scan', 'name'], name='metrics_scan_id_c4cc62_idx'), models.Index(fields=['source'], name='metrics_source_71e403_idx')], + 'constraints': [models.UniqueConstraint(fields=('scan', 'name'), name='unique_metric_per_scan')], + }, + ), + migrations.CreateModel( + name='Issue', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('category', models.CharField(choices=[('performance', 'Performance'), ('security', 'Security'), ('headers', 'HTTP Headers'), ('tls', 'TLS/SSL'), ('cors', 'CORS'), ('accessibility', 'Accessibility'), ('seo', 'SEO'), ('best_practices', 'Best Practices'), ('content', 'Content'), ('resources', 'Resources')], db_index=True, help_text='Category of the issue', max_length=30)), + ('severity', models.CharField(choices=[('critical', 'Critical'), ('high', 'High'), ('medium', 'Medium'), ('low', 'Low'), ('info', 'Informational')], db_index=True, help_text='Severity level of the issue', max_length=20)), + ('tool', models.CharField(choices=[('lighthouse', 'Google Lighthouse'), ('owasp_zap', 'OWASP ZAP'), ('playwright', 'Playwright'), ('header_check', 'HTTP Header Check'), ('tls_check', 'TLS/SSL Check')], help_text='Tool that detected this issue', max_length=30)), + ('title', models.CharField(help_text='Brief title of the issue', max_length=500)), + ('description', models.TextField(help_text='Detailed description of the issue')), + ('affected_url', models.URLField(blank=True, help_text='Specific URL affected by this issue', max_length=2048, null=True)), + ('remediation', models.TextField(blank=True, help_text='Suggested fix or remediation', null=True)), + ('raw_data', models.JSONField(blank=True, help_text='Raw data from the scanner for this issue', null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('scan', models.ForeignKey(help_text='The scan that found this issue', on_delete=django.db.models.deletion.CASCADE, related_name='issues', to='websites.scan')), + ], + options={ + 'db_table': 'issues', + 'ordering': ['severity', '-created_at'], + 'indexes': [models.Index(fields=['scan', 'category'], name='issues_scan_id_e7f389_idx'), models.Index(fields=['scan', 'severity'], name='issues_scan_id_c92ffd_idx'), models.Index(fields=['tool'], name='issues_tool_78d942_idx')], + }, + ), + migrations.AddIndex( + model_name='scan', + index=models.Index(fields=['status'], name='scans_status_dc5ad7_idx'), + ), + migrations.AddIndex( + model_name='scan', + index=models.Index(fields=['-created_at'], name='scans_created_7db2e5_idx'), + ), + migrations.AddIndex( + model_name='scan', + index=models.Index(fields=['website', '-created_at'], name='scans_website_6dae4d_idx'), + ), + ] diff --git a/backend/websites/migrations/__init__.py b/backend/websites/migrations/__init__.py new file mode 100644 index 0000000..e69de29