#!/usr/bin/env python3
"""
Post-Migration Validation Script
Validates data integrity between source and target Salesforce orgs
Generates comprehensive HTML reconciliation report
Author: Senior Developer, NULogic
Version: 1.0
"""

import os
import json
import logging
from datetime import datetime
from typing import Dict, List, Tuple, Any
from simple_salesforce import Salesforce
from collections import defaultdict

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('migration_validation.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


class MigrationValidator:
    """Validate migrated data between orgs"""

    def __init__(self):
        """Initialize validator"""
        self.source_sf = None
        self.target_sf = None
        self.validation_results = {}
        self.validation_start_time = None
        self.critical_failures = []

    def connect_to_orgs(self, source_config: Dict, target_config: Dict) -> bool:
        """Connect to both source and target orgs"""
        try:
            self.source_sf = Salesforce(**source_config)
            logger.info(f'Connected to source org: {source_config["username"]}')

            self.target_sf = Salesforce(**target_config)
            logger.info(f'Connected to target org: {target_config["username"]}')

            return True
        except Exception as e:
            logger.error(f'Failed to connect to orgs: {str(e)}')
            return False

    def run_validation(self) -> bool:
        """Execute full validation suite"""
        self.validation_start_time = datetime.now()
        logger.info('Starting post-migration validation')

        try:
            self._validate_products()
            self._validate_pricebooks()
            self._validate_pricebook_entries()
            self._validate_orders()
            self._validate_order_items()
            self._validate_relationships()

            self._generate_html_report()

            if self.critical_failures:
                logger.warning(f'Validation completed with {len(self.critical_failures)} critical failures')
                return False

            logger.info('Validation completed successfully')
            return True

        except Exception as e:
            logger.error(f'Validation failed: {str(e)}')
            return False

    def _validate_products(self):
        """Validate product migration"""
        logger.info('Validating products...')

        # Count comparison
        source_count = self._get_source_count('ccrz__E_Product__c', 'ccrz__IsActive__c = true')
        target_count = self._get_target_count('Product2')

        result = {
            'object': 'Product2',
            'source_count': source_count,
            'target_count': target_count,
            'count_match': source_count == target_count,
            'issues': []
        }

        if source_count != target_count:
            result['issues'].append(f'Count mismatch: {source_count} source vs {target_count} target')
            self.critical_failures.append('Product count validation failed')

        # Sample validation
        sample_size = min(100, target_count)
        source_products = self._query_source(
            "SELECT ccrz__ProductCode__c, ccrz__ProductName__c FROM ccrz__E_Product__c "
            "WHERE ccrz__IsActive__c = true ORDER BY CreatedDate DESC LIMIT " + str(sample_size)
        )

        target_products = self._query_target(
            "SELECT ProductCode, Name FROM Product2 "
            "ORDER BY CreatedDate DESC LIMIT " + str(sample_size)
        )

        target_codes = set(p['ProductCode'] for p in target_products if p.get('ProductCode'))
        matched = sum(1 for p in source_products if p.get('ccrz__ProductCode__c') in target_codes)

        result['sample_size'] = sample_size
        result['matched'] = matched
        result['match_percent'] = (matched / sample_size * 100) if sample_size > 0 else 0

        if result['match_percent'] < 95:
            result['issues'].append(f'Sample validation failed: {result["match_percent"]:.1f}% matched')

        self.validation_results['products'] = result
        logger.info(f'Product validation: {matched}/{sample_size} matched')

    def _validate_pricebooks(self):
        """Validate pricebook migration"""
        logger.info('Validating pricebooks...')

        source_count = self._get_source_count('ccrz__E_PriceList__c', 'ccrz__IsActive__c = true')
        target_count = self._get_target_count('Pricebook2', 'WHERE IsStandard = false')

        result = {
            'object': 'Pricebook2',
            'source_count': source_count,
            'target_count': target_count,
            'count_match': source_count == target_count,
            'issues': []
        }

        if source_count != target_count:
            result['issues'].append(f'Count mismatch: {source_count} source vs {target_count} target')

        self.validation_results['pricebooks'] = result
        logger.info(f'Pricebook validation: {target_count} pricebooks in target')

    def _validate_pricebook_entries(self):
        """Validate pricing entries with accuracy checks"""
        logger.info('Validating pricebook entries...')

        source_count = self._get_source_count('ccrz__E_PriceListItem__c', 'ccrz__IsActive__c = true')
        target_count = self._get_target_count('PricebookEntry')

        result = {
            'object': 'PricebookEntry',
            'source_count': source_count,
            'target_count': target_count,
            'count_match': source_count == target_count,
            'issues': [],
            'pricing_accuracy': {}
        }

        if source_count != target_count:
            result['issues'].append(f'Count mismatch: {source_count} source vs {target_count} target')
            self.critical_failures.append('PricebookEntry count validation failed')

        # Sample-based pricing validation
        target_sample = self._query_target(
            'SELECT UnitPrice, Product2Id FROM PricebookEntry LIMIT 50'
        )

        total_price = sum(float(p.get('UnitPrice', 0)) for p in target_sample if p.get('UnitPrice'))
        avg_price = total_price / len(target_sample) if target_sample else 0

        result['pricing_accuracy'] = {
            'sample_size': len(target_sample),
            'total_value': total_price,
            'average_price': avg_price
        }

        if avg_price == 0:
            result['issues'].append('No valid prices found in sample')

        self.validation_results['pricebook_entries'] = result
        logger.info(f'PricebookEntry validation: {len(target_sample)} sampled for pricing')

    def _validate_orders(self):
        """Validate order migration"""
        logger.info('Validating orders...')

        source_count = self._get_source_count('ccrz__E_Order__c')
        target_count = self._get_target_count('Order')

        result = {
            'object': 'Order',
            'source_count': source_count,
            'target_count': target_count,
            'count_match': source_count == target_count,
            'issues': []
        }

        if source_count != target_count:
            result['issues'].append(f'Count mismatch: {source_count} source vs {target_count} target')
            self.critical_failures.append('Order count validation failed')

        # Check for orders without accounts
        sample = self._query_target(
            'SELECT Id, AccountId FROM Order LIMIT 100'
        )

        without_account = sum(1 for o in sample if not o.get('AccountId'))
        if without_account > 0:
            result['issues'].append(f'{without_account} orders missing AccountId')

        result['sample_size'] = len(sample)
        result['orders_without_account'] = without_account

        self.validation_results['orders'] = result
        logger.info(f'Order validation: {without_account} orders missing accounts')

    def _validate_order_items(self):
        """Validate order item migration"""
        logger.info('Validating order items...')

        source_count = self._get_source_count('ccrz__E_OrderItem__c')
        target_count = self._get_target_count('OrderItem')

        result = {
            'object': 'OrderItem',
            'source_count': source_count,
            'target_count': target_count,
            'count_match': source_count == target_count,
            'issues': []
        }

        if source_count != target_count:
            result['issues'].append(f'Count mismatch: {source_count} source vs {target_count} target')

        # Verify relationship integrity
        orphan_items = self._query_target(
            'SELECT COUNT() FROM OrderItem WHERE OrderId = null'
        )

        if orphan_items and orphan_items[0]['totalSize'] > 0:
            result['issues'].append(f'{orphan_items[0]["totalSize"]} order items without OrderId')

        self.validation_results['order_items'] = result
        logger.info(f'OrderItem validation: {target_count} items in target')

    def _validate_relationships(self):
        """Validate referential integrity across objects"""
        logger.info('Validating relationships...')

        result = {
            'object': 'Relationships',
            'issues': []
        }

        # Verify all products referenced in pricing exist
        pricing_items = self._query_target(
            'SELECT Product2Id FROM PricebookEntry GROUP BY Product2Id LIMIT 10000'
        )
        pricing_product_ids = set(p['Product2Id'] for p in pricing_items if p.get('Product2Id'))

        if pricing_product_ids:
            existing_products = self._query_target(
                f"SELECT Id FROM Product2 WHERE Id IN ('{\"','\"'
                '.join(pricing_product_ids)}') LIMIT 10000"
            )
            existing_ids = set(p['Id'] for p in existing_products)
            missing = pricing_product_ids - existing_ids

            if missing:
                result['issues'].append(f'{len(missing)} pricing items reference non-existent products')

        self.validation_results['relationships'] = result

    def _get_source_count(self, sobject: str, where_clause: str = '') -> int:
        """Get count from source org"""
        try:
            query = f"SELECT COUNT() FROM {sobject}"
            if where_clause:
                query += f" WHERE {where_clause}"

            result = self.source_sf.query_all(query)
            return result.get('totalSize', 0)
        except Exception as e:
            logger.error(f'Error counting {sobject} in source: {str(e)}')
            return 0

    def _get_target_count(self, sobject: str, where_clause: str = '') -> int:
        """Get count from target org"""
        try:
            query = f"SELECT COUNT() FROM {sobject} {where_clause}"
            result = self.target_sf.query_all(query)
            return result.get('totalSize', 0)
        except Exception as e:
            logger.error(f'Error counting {sobject} in target: {str(e)}')
            return 0

    def _query_source(self, query: str) -> List[Dict]:
        """Execute query against source org"""
        try:
            result = self.source_sf.query_all(query)
            return result.get('records', [])
        except Exception as e:
            logger.error(f'Source query error: {str(e)}')
            return []

    def _query_target(self, query: str) -> List[Dict]:
        """Execute query against target org"""
        try:
            result = self.target_sf.query_all(query)
            return result.get('records', [])
        except Exception as e:
            logger.error(f'Target query error: {str(e)}')
            return []

    def _generate_html_report(self):
        """Generate HTML validation report"""
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        report_path = f'migration_validation_report_{timestamp}.html'

        html_content = self._build_html_report()

        with open(report_path, 'w') as f:
            f.write(html_content)

        logger.info(f'HTML report generated: {report_path}')

    def _build_html_report(self) -> str:
        """Build HTML report content"""
        html = """
        <html>
        <head>
            <title>CloudCraze Migration Validation Report</title>
            <style>
                body { font-family: Arial, sans-serif; margin: 20px; }
                h1 { color: #333; }
                table { border-collapse: collapse; width: 100%; margin: 20px 0; }
                th, td { border: 1px solid #ddd; padding: 12px; text-align: left; }
                th { background-color: #4CAF50; color: white; }
                tr:nth-child(even) { background-color: #f2f2f2; }
                .pass { color: green; font-weight: bold; }
                .fail { color: red; font-weight: bold; }
                .warning { color: orange; font-weight: bold; }
                .issue { background-color: #ffe6e6; padding: 10px; margin: 10px 0; }
            </style>
        </head>
        <body>
            <h1>CloudCraze to Lightning Migration - Validation Report</h1>
            <p>Generated: {timestamp}</p>
        """.format(timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

        if self.critical_failures:
            html += f"""
            <div style="background-color: #ffe6e6; padding: 20px; margin: 20px 0; border-radius: 5px;">
                <h2>Critical Failures</h2>
                <ul>
            """
            for failure in self.critical_failures:
                html += f"<li>{failure}</li>"
            html += "</ul></div>"

        html += """
            <h2>Validation Results by Object</h2>
            <table>
                <tr>
                    <th>Object</th>
                    <th>Source Count</th>
                    <th>Target Count</th>
                    <th>Status</th>
                    <th>Match %</th>
                    <th>Issues</th>
                </tr>
        """

        for object_name, result in self.validation_results.items():
            status = "PASS" if not result.get('issues') else "FAIL"
            status_class = "pass" if status == "PASS" else "fail"
            match_percent = result.get('match_percent', 100)

            issues_html = ""
            if result.get('issues'):
                for issue in result['issues']:
                    issues_html += f"<div class='issue'>{issue}</div>"

            html += f"""
                <tr>
                    <td><strong>{result.get('object', object_name)}</strong></td>
                    <td>{result.get('source_count', 'N/A')}</td>
                    <td>{result.get('target_count', 'N/A')}</td>
                    <td class='{status_class}'>{status}</td>
                    <td>{match_percent:.1f}%</td>
                    <td>{issues_html}</td>
                </tr>
            """

        html += """
            </table>
            <h2>Validation Summary</h2>
            <ul>
        """

        total_results = len(self.validation_results)
        passed = sum(1 for r in self.validation_results.values() if not r.get('issues'))

        html += f"""
                <li>Total Objects Validated: {total_results}</li>
                <li>Passed: {passed}</li>
                <li>Failed: {total_results - passed}</li>
                <li>Critical Failures: {len(self.critical_failures)}</li>
            </ul>
            <p><em>For detailed logs, see migration_validation.log</em></p>
        </body>
        </html>
        """

        return html


def main():
    """Main execution"""
    validator = MigrationValidator()

    source_config = {
        'client_id': os.getenv('SOURCE_CLIENT_ID'),
        'client_secret': os.getenv('SOURCE_CLIENT_SECRET'),
        'username': os.getenv('SOURCE_USERNAME'),
        'password': os.getenv('SOURCE_PASSWORD'),
        'sandbox': True
    }

    target_config = {
        'client_id': os.getenv('TARGET_CLIENT_ID'),
        'client_secret': os.getenv('TARGET_CLIENT_SECRET'),
        'username': os.getenv('TARGET_USERNAME'),
        'password': os.getenv('TARGET_PASSWORD'),
        'sandbox': False
    }

    if not validator.connect_to_orgs(source_config, target_config):
        logger.error('Failed to connect to orgs')
        return

    success = validator.run_validation()

    if not success:
        logger.error('Validation detected critical issues')


if __name__ == '__main__':
    main()