secure-web/backend/tests/test_scanner_parsing.py

"""
Tests for scanner result parsing.
"""

import pytest
import json
from pathlib import Path


# Sample Lighthouse JSON response
SAMPLE_LIGHTHOUSE_RESPONSE = {
    "scanId": "test-123",
    "url": "https://example.com",
    "scores": {
        "performance": 85,
        "accessibility": 90,
        "bestPractices": 75,
        "seo": 80
    },
    "metrics": {
        "firstContentfulPaint": {"value": 1200, "unit": "ms", "score": 0.9},
        "largestContentfulPaint": {"value": 2500, "unit": "ms", "score": 0.75},
        "speedIndex": {"value": 3400, "unit": "ms", "score": 0.7},
        "timeToInteractive": {"value": 4500, "unit": "ms", "score": 0.65},
        "totalBlockingTime": {"value": 150, "unit": "ms", "score": 0.85},
        "cumulativeLayoutShift": {"value": 0.1, "unit": "score", "score": 0.95}
    },
    "resources": {
        "totalByteWeight": 2500000,
        "unusedJavascript": [
            {"url": "https://example.com/bundle.js", "wastedBytes": 150000}
        ],
        "renderBlockingResources": [
            {"url": "https://example.com/styles.css", "wastedMs": 500}
        ]
    },
    "diagnostics": {
        "numRequests": 45,
        "numScripts": 12,
        "numStylesheets": 3,
        "numImages": 20
    },
    "issues": [
        {
            "id": "uses-long-cache-ttl",
            "category": "performance",
            "title": "Serve static assets with an efficient cache policy",
            "description": "A long cache lifetime can speed up repeat visits.",
            "score": 0.3,
            "impact": 5
        }
    ]
}


class TestLighthouseResultParsing:
    """Tests for parsing Lighthouse scanner results."""

    def test_parse_scores(self):
        """Test extracting scores from Lighthouse response."""
        scores = SAMPLE_LIGHTHOUSE_RESPONSE['scores']

        assert scores['performance'] == 85
        assert scores['accessibility'] == 90
        assert scores['bestPractices'] == 75
        assert scores['seo'] == 80

    def test_parse_core_web_vitals(self):
        """Test extracting Core Web Vitals metrics."""
        metrics = SAMPLE_LIGHTHOUSE_RESPONSE['metrics']

        # FCP
        assert metrics['firstContentfulPaint']['value'] == 1200
        assert metrics['firstContentfulPaint']['unit'] == 'ms'

        # LCP
        assert metrics['largestContentfulPaint']['value'] == 2500

        # CLS
        assert metrics['cumulativeLayoutShift']['value'] == 0.1
        assert metrics['cumulativeLayoutShift']['unit'] == 'score'

    def test_parse_resource_metrics(self):
        """Test extracting resource metrics."""
        resources = SAMPLE_LIGHTHOUSE_RESPONSE['resources']
        diagnostics = SAMPLE_LIGHTHOUSE_RESPONSE['diagnostics']

        assert resources['totalByteWeight'] == 2500000
        assert diagnostics['numRequests'] == 45
        assert diagnostics['numScripts'] == 12

    def test_parse_issues(self):
        """Test extracting issues from Lighthouse."""
        issues = SAMPLE_LIGHTHOUSE_RESPONSE['issues']

        assert len(issues) == 1
        issue = issues[0]
        assert issue['category'] == 'performance'
        assert issue['title'] == 'Serve static assets with an efficient cache policy'


# Sample ZAP response
SAMPLE_ZAP_ALERTS = [
    {
        "alert": "Cross-Site Scripting (Reflected)",
        "risk": "3",  # High
        "confidence": "2",
        "cweid": "79",
        "wascid": "8",
        "description": "Cross-site Scripting (XSS) is an attack technique...",
        "url": "https://example.com/search?q=test",
        "param": "q",
        "evidence": "<script>alert(1)</script>",
        "solution": "Phase: Architecture and Design\nUse a vetted library...",
        "reference": "https://owasp.org/www-community/attacks/xss/"
    },
    {
        "alert": "Missing Anti-clickjacking Header",
        "risk": "2",  # Medium
        "confidence": "3",
        "cweid": "1021",
        "wascid": "15",
        "description": "The response does not include X-Frame-Options...",
        "url": "https://example.com/",
        "solution": "Ensure X-Frame-Options HTTP header is included...",
        "reference": "https://owasp.org/www-community/Security_Headers"
    },
    {
        "alert": "Server Leaks Information",
        "risk": "1",  # Low
        "confidence": "3",
        "cweid": "200",
        "description": "The web/application server is leaking information...",
        "url": "https://example.com/",
        "evidence": "nginx/1.18.0",
        "solution": "Configure the server to hide version information."
    },
    {
        "alert": "Information Disclosure",
        "risk": "0",  # Info
        "confidence": "2",
        "description": "This is an informational finding.",
        "url": "https://example.com/"
    }
]


class TestZAPResultParsing:
    """Tests for parsing OWASP ZAP results."""

    def test_parse_alert_severity(self):
        """Test mapping ZAP risk levels to severity."""
        risk_mapping = {
            '0': 'info',
            '1': 'low',
            '2': 'medium',
            '3': 'high',
        }

        for alert in SAMPLE_ZAP_ALERTS:
            risk = alert['risk']
            expected_severity = risk_mapping[risk]
            assert expected_severity in ['info', 'low', 'medium', 'high']

    def test_parse_xss_alert(self):
        """Test parsing XSS vulnerability alert."""
        xss_alert = SAMPLE_ZAP_ALERTS[0]

        assert xss_alert['alert'] == 'Cross-Site Scripting (Reflected)'
        assert xss_alert['risk'] == '3'  # High
        assert xss_alert['cweid'] == '79'  # XSS CWE ID
        assert 'q' in xss_alert['param']

    def test_parse_header_alert(self):
        """Test parsing missing header alert."""
        header_alert = SAMPLE_ZAP_ALERTS[1]

        assert 'X-Frame-Options' in header_alert['alert']
        assert header_alert['risk'] == '2'  # Medium

    def test_categorize_alerts(self):
        """Test categorizing ZAP alerts."""
        def categorize(alert_name):
            alert_lower = alert_name.lower()
            if 'xss' in alert_lower or 'cross-site scripting' in alert_lower:
                return 'security'
            if 'header' in alert_lower or 'x-frame' in alert_lower:
                return 'headers'
            if 'cookie' in alert_lower:
                return 'security'
            return 'security'

        assert categorize(SAMPLE_ZAP_ALERTS[0]['alert']) == 'security'
        assert categorize(SAMPLE_ZAP_ALERTS[1]['alert']) == 'headers'


# Sample HTTP headers response
SAMPLE_HEADERS = {
    'content-type': 'text/html; charset=utf-8',
    'server': 'nginx/1.18.0',
    'x-powered-by': 'Express',
    'strict-transport-security': 'max-age=31536000; includeSubDomains',
    'x-content-type-options': 'nosniff',
    'x-frame-options': 'SAMEORIGIN',
    # Missing: Content-Security-Policy, Referrer-Policy, Permissions-Policy
}


class TestHeadersResultParsing:
    """Tests for parsing HTTP headers analysis."""

    REQUIRED_HEADERS = [
        'strict-transport-security',
        'content-security-policy',
        'x-frame-options',
        'x-content-type-options',
        'referrer-policy',
        'permissions-policy',
    ]

    def test_detect_present_headers(self):
        """Test detecting which security headers are present."""
        headers_lower = {k.lower(): v for k, v in SAMPLE_HEADERS.items()}

        present = [h for h in self.REQUIRED_HEADERS if h in headers_lower]

        assert 'strict-transport-security' in present
        assert 'x-frame-options' in present
        assert 'x-content-type-options' in present

    def test_detect_missing_headers(self):
        """Test detecting which security headers are missing."""
        headers_lower = {k.lower(): v for k, v in SAMPLE_HEADERS.items()}

        missing = [h for h in self.REQUIRED_HEADERS if h not in headers_lower]

        assert 'content-security-policy' in missing
        assert 'referrer-policy' in missing
        assert 'permissions-policy' in missing

    def test_detect_information_disclosure(self):
        """Test detecting information disclosure headers."""
        info_disclosure_headers = ['server', 'x-powered-by', 'x-aspnet-version']

        disclosed = [
            h for h in info_disclosure_headers
            if h.lower() in {k.lower() for k in SAMPLE_HEADERS.keys()}
        ]

        assert 'server' in disclosed
        assert 'x-powered-by' in disclosed

    def test_check_hsts_max_age(self):
        """Test checking HSTS max-age value."""
        hsts = SAMPLE_HEADERS.get('strict-transport-security', '')

        # Extract max-age
        if 'max-age=' in hsts.lower():
            max_age_str = hsts.lower().split('max-age=')[1].split(';')[0]
            max_age = int(max_age_str)

            # Should be at least 1 year (31536000 seconds)
            assert max_age >= 31536000


class TestScannerResultIntegration:
    """Integration tests for combining scanner results."""

    def test_aggregate_scores(self):
        """Test aggregating scores from multiple scanners."""
        lighthouse_scores = SAMPLE_LIGHTHOUSE_RESPONSE['scores']

        # Simulate security score from ZAP findings
        security_score = 100
        for alert in SAMPLE_ZAP_ALERTS:
            risk = alert['risk']
            if risk == '3':
                security_score -= 15  # High
            elif risk == '2':
                security_score -= 8   # Medium
            elif risk == '1':
                security_score -= 3   # Low
            else:
                security_score -= 1   # Info

        security_score = max(0, security_score)

        # Calculate overall (simplified)
        overall = (
            lighthouse_scores['performance'] * 0.25 +
            security_score * 0.30 +
            lighthouse_scores['accessibility'] * 0.15 +
            lighthouse_scores['seo'] * 0.15 +
            lighthouse_scores['bestPractices'] * 0.15
        )

        assert 0 <= overall <= 100

    def test_combine_issues(self):
        """Test combining issues from multiple scanners."""
        # Lighthouse issues
        lighthouse_issues = [
            {
                'category': 'performance',
                'severity': 'medium',
                'tool': 'lighthouse',
                'title': issue['title']
            }
            for issue in SAMPLE_LIGHTHOUSE_RESPONSE['issues']
        ]

        # ZAP issues
        risk_to_severity = {'0': 'info', '1': 'low', '2': 'medium', '3': 'high'}
        zap_issues = [
            {
                'category': 'security',
                'severity': risk_to_severity[alert['risk']],
                'tool': 'owasp_zap',
                'title': alert['alert']
            }
            for alert in SAMPLE_ZAP_ALERTS
        ]

        # Header issues
        headers_lower = {k.lower(): v for k, v in SAMPLE_HEADERS.items()}
        header_issues = [
            {
                'category': 'headers',
                'severity': 'high' if h == 'content-security-policy' else 'medium',
                'tool': 'header_check',
                'title': f'Missing {h} header'
            }
            for h in ['content-security-policy', 'referrer-policy', 'permissions-policy']
            if h not in headers_lower
        ]

        all_issues = lighthouse_issues + zap_issues + header_issues

        assert len(all_issues) > 0

        # Count by severity
        severity_counts = {}
        for issue in all_issues:
            severity = issue['severity']
            severity_counts[severity] = severity_counts.get(severity, 0) + 1

        assert 'high' in severity_counts or 'medium' in severity_counts