#!/usr/bin/env python3
"""
Validate and report on test Category traits across the codebase.

Sprint: SPRINT_20251226_007_CICD

This script scans all test files in the codebase and reports:
1. Test files with Category traits
2. Test files missing Category traits
3. Coverage percentage by module

Usage:
    python devops/scripts/validate-test-traits.py [--fix] [--module <name>]

Options:
    --fix       Attempt to add default Unit trait to tests without categories
    --module    Only process tests in the specified module
    --verbose   Show detailed output
    --json      Output as JSON for CI consumption
"""

import os
import re
import sys
import json
import argparse
from pathlib import Path
from dataclasses import dataclass, field
from typing import List, Dict, Set, Optional


VALID_CATEGORIES = {
    "Unit",
    "Integration",
    "Architecture",
    "Contract",
    "Security",
    "Golden",
    "Performance",
    "Benchmark",
    "AirGap",
    "Chaos",
    "Determinism",
    "Resilience",
    "Observability",
    "Property",
    "Snapshot",
    "Live",
}

# Patterns to identify test methods and classes
FACT_PATTERN = re.compile(r'\[Fact[^\]]*\]')
THEORY_PATTERN = re.compile(r'\[Theory[^\]]*\]')
# Match both string literals and TestCategories.Xxx constants
# Also match inline format like [Fact, Trait("Category", ...)]
TRAIT_CATEGORY_PATTERN = re.compile(
    r'Trait\s*\(\s*["\']Category["\']\s*,\s*(?:["\'](\w+)["\']|TestCategories\.(\w+))\s*\)'
)
TEST_CLASS_PATTERN = re.compile(r'public\s+(?:sealed\s+)?class\s+\w+.*Tests?\b')


@dataclass
class TestFileAnalysis:
    path: str
    has_facts: bool = False
    has_theories: bool = False
    has_category_traits: bool = False
    categories_found: Set[str] = field(default_factory=set)
    test_method_count: int = 0
    categorized_test_count: int = 0


def analyze_test_file(file_path: Path) -> TestFileAnalysis:
    """Analyze a single test file for Category traits."""
    analysis = TestFileAnalysis(path=str(file_path))

    try:
        content = file_path.read_text(encoding='utf-8', errors='ignore')
    except Exception as e:
        print(f"Warning: Could not read {file_path}: {e}", file=sys.stderr)
        return analysis

    # Check for test methods
    facts = FACT_PATTERN.findall(content)
    theories = THEORY_PATTERN.findall(content)

    analysis.has_facts = len(facts) > 0
    analysis.has_theories = len(theories) > 0
    analysis.test_method_count = len(facts) + len(theories)

    # Check for Category traits
    category_matches = TRAIT_CATEGORY_PATTERN.findall(content)
    if category_matches:
        analysis.has_category_traits = True
        # Pattern has two capture groups - one for string literal, one for constant
        # Extract non-empty values from tuples
        categories = set()
        for match in category_matches:
            cat = match[0] or match[1]  # First non-empty group
            if cat:
                categories.add(cat)
        analysis.categories_found = categories
        analysis.categorized_test_count = len(category_matches)

    return analysis


def get_module_from_path(file_path: Path) -> str:
    """Extract module name from file path."""
    parts = file_path.parts

    # Look for src/<Module> pattern
    for i, part in enumerate(parts):
        if part == 'src' and i + 1 < len(parts):
            next_part = parts[i + 1]
            if next_part.startswith('__'):
                return next_part  # e.g., __Tests, __Libraries
            return next_part

    return "Unknown"


def find_test_files(root_path: Path, module_filter: Optional[str] = None) -> List[Path]:
    """Find all test files in the codebase."""
    test_files = []

    for pattern in ['**/*.Tests.cs', '**/*Test.cs', '**/*Tests/*.cs']:
        for file_path in root_path.glob(pattern):
            # Skip generated files
            if '/obj/' in str(file_path) or '/bin/' in str(file_path):
                continue
            if 'node_modules' in str(file_path):
                continue

            # Apply module filter if specified
            if module_filter:
                module = get_module_from_path(file_path)
                if module.lower() != module_filter.lower():
                    continue

            test_files.append(file_path)

    return test_files


def generate_report(analyses: List[TestFileAnalysis], verbose: bool = False) -> Dict:
    """Generate a summary report from analyses."""
    total_files = len(analyses)
    files_with_tests = [a for a in analyses if a.has_facts or a.has_theories]
    files_with_traits = [a for a in analyses if a.has_category_traits]
    files_missing_traits = [a for a in files_with_tests if not a.has_category_traits]

    # Group by module
    by_module: Dict[str, Dict] = {}
    for analysis in analyses:
        module = get_module_from_path(Path(analysis.path))
        if module not in by_module:
            by_module[module] = {
                'total': 0,
                'with_tests': 0,
                'with_traits': 0,
                'missing_traits': 0,
                'files_missing': []
            }

        by_module[module]['total'] += 1
        if analysis.has_facts or analysis.has_theories:
            by_module[module]['with_tests'] += 1
        if analysis.has_category_traits:
            by_module[module]['with_traits'] += 1
        else:
            if analysis.has_facts or analysis.has_theories:
                by_module[module]['missing_traits'] += 1
                if verbose:
                    by_module[module]['files_missing'].append(analysis.path)

    # Calculate coverage
    coverage = (len(files_with_traits) / len(files_with_tests) * 100) if files_with_tests else 0

    # Collect all categories found
    all_categories: Set[str] = set()
    for analysis in analyses:
        all_categories.update(analysis.categories_found)

    return {
        'summary': {
            'total_test_files': total_files,
            'files_with_tests': len(files_with_tests),
            'files_with_category_traits': len(files_with_traits),
            'files_missing_traits': len(files_missing_traits),
            'coverage_percent': round(coverage, 1),
            'categories_used': sorted(all_categories),
            'valid_categories': sorted(VALID_CATEGORIES),
        },
        'by_module': by_module,
        'files_missing_traits': [a.path for a in files_missing_traits] if verbose else []
    }


def add_default_trait(file_path: Path, default_category: str = "Unit") -> bool:
    """Add default Category trait to test methods missing traits."""
    try:
        content = file_path.read_text(encoding='utf-8')
        original = content

        # Pattern to find [Fact] or [Theory] not preceded by Category trait
        # This is a simplified approach - adds trait after [Fact] or [Theory]

        # Check if file already has Category traits
        if TRAIT_CATEGORY_PATTERN.search(content):
            return False  # Already has some traits, skip

        # Add using statement if not present
        if 'using StellaOps.TestKit;' not in content:
            # Find last using statement and add after it
            using_pattern = re.compile(r'(using [^;]+;\s*\n)(?!using)')
            match = list(using_pattern.finditer(content))
            if match:
                last_using = match[-1]
                insert_pos = last_using.end()
                content = content[:insert_pos] + 'using StellaOps.TestKit;\n' + content[insert_pos:]

        # Add Trait to [Fact] attributes
        content = re.sub(
            r'(\[Fact\])',
            f'[Trait("Category", TestCategories.{default_category})]\n        \\1',
            content
        )

        # Add Trait to [Theory] attributes
        content = re.sub(
            r'(\[Theory\])',
            f'[Trait("Category", TestCategories.{default_category})]\n        \\1',
            content
        )

        if content != original:
            file_path.write_text(content, encoding='utf-8')
            return True

        return False
    except Exception as e:
        print(f"Error processing {file_path}: {e}", file=sys.stderr)
        return False


def main():
    parser = argparse.ArgumentParser(description='Validate test Category traits')
    parser.add_argument('--fix', action='store_true', help='Add default Unit trait to tests without categories')
    parser.add_argument('--module', type=str, help='Only process tests in the specified module')
    parser.add_argument('--verbose', '-v', action='store_true', help='Show detailed output')
    parser.add_argument('--json', action='store_true', help='Output as JSON')
    parser.add_argument('--category', type=str, default='Unit', help='Default category for --fix (default: Unit)')

    args = parser.parse_args()

    # Find repository root
    script_path = Path(__file__).resolve()
    repo_root = script_path.parent.parent.parent
    src_path = repo_root / 'src'

    if not src_path.exists():
        print(f"Error: src directory not found at {src_path}", file=sys.stderr)
        sys.exit(1)

    # Find all test files
    test_files = find_test_files(src_path, args.module)

    if not args.json:
        print(f"Found {len(test_files)} test files to analyze...")

    # Analyze each file
    analyses = [analyze_test_file(f) for f in test_files]

    # Generate report
    report = generate_report(analyses, args.verbose)

    if args.json:
        print(json.dumps(report, indent=2))
    else:
        # Print summary
        summary = report['summary']
        print("\n" + "=" * 60)
        print("TEST CATEGORY TRAIT COVERAGE REPORT")
        print("=" * 60)
        print(f"Total test files:          {summary['total_test_files']}")
        print(f"Files with test methods:   {summary['files_with_tests']}")
        print(f"Files with Category trait: {summary['files_with_category_traits']}")
        print(f"Files missing traits:      {summary['files_missing_traits']}")
        print(f"Coverage:                  {summary['coverage_percent']}%")
        print(f"\nCategories in use: {', '.join(summary['categories_used']) or 'None'}")
        print(f"Valid categories:  {', '.join(summary['valid_categories'])}")

        # Print by module
        print("\n" + "-" * 60)
        print("BY MODULE")
        print("-" * 60)
        print(f"{'Module':<25} {'With Tests':<12} {'With Traits':<12} {'Missing':<10}")
        print("-" * 60)

        for module, data in sorted(report['by_module'].items()):
            if data['with_tests'] > 0:
                print(f"{module:<25} {data['with_tests']:<12} {data['with_traits']:<12} {data['missing_traits']:<10}")

        # Show files missing traits if verbose
        if args.verbose and report['files_missing_traits']:
            print("\n" + "-" * 60)
            print("FILES MISSING CATEGORY TRAITS")
            print("-" * 60)
            for f in sorted(report['files_missing_traits'])[:50]:  # Limit to first 50
                print(f"  {f}")
            if len(report['files_missing_traits']) > 50:
                print(f"  ... and {len(report['files_missing_traits']) - 50} more")

    # Fix mode
    if args.fix:
        files_to_fix = [Path(a.path) for a in analyses
                       if (a.has_facts or a.has_theories) and not a.has_category_traits]

        if not args.json:
            print(f"\n{'=' * 60}")
            print(f"FIXING {len(files_to_fix)} FILES WITH DEFAULT CATEGORY: {args.category}")
            print("=" * 60)

        fixed_count = 0
        for file_path in files_to_fix:
            if add_default_trait(file_path, args.category):
                fixed_count += 1
                if not args.json:
                    print(f"  Fixed: {file_path}")

        if not args.json:
            print(f"\nFixed {fixed_count} files")

    # Exit with error code if coverage is below threshold
    if report['summary']['coverage_percent'] < 80:
        sys.exit(1)

    sys.exit(0)


if __name__ == '__main__':
    main()