#!/usr/bin/env python3
"""
Ensure Complete Analysis - Fix Missing Scores
Ensures every property has BOTH GPT and Custom scores (or neither)
This should never be needed in a well-architected system!
"""

import json
import subprocess
import sys
from pathlib import Path

def check_completeness():
    """Check which properties are missing scores"""

    print("="*70)
    print("🔍 CHECKING ANALYSIS COMPLETENESS")
    print("="*70)
    print()

    with open('enriched_data.json') as f:
        properties = json.load(f)

    total = len(properties)
    with_gpt = sum(1 for p in properties if p.get('gpt_score', 0) > 0)
    with_custom = sum(1 for p in properties if p.get('custom_score', 0) > 0)
    with_both = sum(1 for p in properties if p.get('gpt_score', 0) > 0 and p.get('custom_score', 0) > 0)

    missing_gpt = [p for p in properties if p.get('gpt_score', 0) == 0 and p.get('custom_score', 0) > 0]
    missing_custom = [p for p in properties if p.get('gpt_score', 0) > 0 and p.get('custom_score', 0) == 0]
    missing_both = [p for p in properties if p.get('gpt_score', 0) == 0 and p.get('custom_score', 0) == 0]

    print(f"📊 Current State:")
    print(f"   Total properties: {total}")
    print(f"   With GPT scores: {with_gpt}")
    print(f"   With Custom scores: {with_custom}")
    print(f"   With BOTH scores: {with_both}")
    print()

    issues = []

    if missing_gpt:
        issues.append(('missing_gpt', missing_gpt))
        print(f"⚠️  {len(missing_gpt)} properties MISSING GPT scores (have custom only)")
        for p in missing_gpt[:3]:
            print(f"     • {p['url']}")
        if len(missing_gpt) > 3:
            print(f"     ... and {len(missing_gpt) - 3} more")
        print()

    if missing_custom:
        issues.append(('missing_custom', missing_custom))
        print(f"⚠️  {len(missing_custom)} properties MISSING CUSTOM scores (have GPT only)")
        for p in missing_custom[:3]:
            print(f"     • {p['url']}")
        if len(missing_custom) > 3:
            print(f"     ... and {len(missing_custom) - 3} more")
        print()

    if missing_both:
        issues.append(('missing_both', missing_both))
        print(f"⚠️  {len(missing_both)} properties MISSING BOTH scores")
        for p in missing_both[:3]:
            print(f"     • {p['url']}")
        if len(missing_both) > 3:
            print(f"     ... and {len(missing_both) - 3} more")
        print()

    if not issues:
        print("✅ All properties have complete analysis (both GPT and Custom scores)")
        return True, []

    return False, issues

def fix_missing_gpt(properties):
    """Add properties to extracted_property_urls.csv so they get analyzed"""
    print("🔧 Fixing missing GPT scores...")
    print()

    import pandas as pd

    # Load current CSV
    csv_file = 'extracted_property_urls.csv'
    if Path(csv_file).exists():
        df = pd.read_csv(csv_file)
        existing_urls = set(df['URL'].tolist())
    else:
        df = pd.DataFrame(columns=['URL'])
        existing_urls = set()

    # Add missing URLs
    urls_to_add = [p['url'] for p in properties if p['url'] not in existing_urls]

    if urls_to_add:
        new_rows = pd.DataFrame({'URL': urls_to_add})
        df = pd.concat([df, new_rows], ignore_index=True)
        df.to_csv(csv_file, index=False)
        print(f"   ✓ Added {len(urls_to_add)} URLs to {csv_file}")
        print(f"   → Run 'Analyze Only' to process them")
    else:
        print(f"   ℹ️  All URLs already in {csv_file}")
        print(f"   → These properties may have failed GPT analysis")
        print(f"   → Try running 'Analyze Only' with cache disabled")

    return len(urls_to_add)

def fix_missing_custom(properties):
    """Run custom criteria on properties with GPS but no custom scores"""
    print("🔧 Fixing missing Custom scores...")
    print()

    # Check if properties have GPS coordinates
    without_gps = [p for p in properties if not p.get('lat') or not p.get('lon')]

    if without_gps:
        print(f"   ⚠️  {len(without_gps)} properties missing GPS coordinates")
        print(f"   → These need geocoding first before custom criteria can run")
        return False

    print(f"   ✓ All {len(properties)} properties have GPS coordinates")
    print(f"   → Running custom criteria evaluation...")
    print()

    # Run custom_criteria.py
    try:
        result = subprocess.run(
            ['python3', 'custom_criteria.py'],
            capture_output=True,
            text=True,
            timeout=1800  # 30 min timeout
        )

        if result.returncode == 0:
            print("   ✅ Custom criteria evaluation completed")
            return True
        else:
            print(f"   ❌ Custom criteria evaluation failed")
            print(f"   Error: {result.stderr[:500]}")
            return False

    except subprocess.TimeoutExpired:
        print("   ⏱️  Custom criteria evaluation timed out")
        return False
    except Exception as e:
        print(f"   ❌ Error: {e}")
        return False

def main():
    print()
    print("This script ensures all properties have BOTH GPT and Custom scores")
    print("In a properly architected system, this would never be needed!")
    print()

    # Check current state
    is_complete, issues = check_completeness()

    if is_complete:
        print("="*70)
        print("✅ NO ACTION NEEDED - All properties have complete analysis")
        print("="*70)
        return 0

    print("="*70)
    print("🔧 FIXING INCOMPLETE ANALYSIS")
    print("="*70)
    print()

    # Fix each type of issue
    fixed_any = False

    for issue_type, props in issues:
        if issue_type == 'missing_gpt':
            added = fix_missing_gpt(props)
            if added > 0:
                fixed_any = True
                print(f"   ➡️  Next: Click 'Analyze Only' in Criteria Manager")
            print()

        elif issue_type == 'missing_custom':
            success = fix_missing_custom(props)
            if success:
                fixed_any = True
            print()

        elif issue_type == 'missing_both':
            print(f"⚠️  {len(props)} properties need BOTH analyses")
            print(f"   → Run 'Full Update' to analyze them completely")
            print()

    # Re-check after fixes
    print("="*70)
    print("🔍 RE-CHECKING AFTER FIXES")
    print("="*70)
    print()

    is_complete_now, remaining_issues = check_completeness()

    if is_complete_now:
        print("="*70)
        print("✅ SUCCESS - All properties now have complete analysis!")
        print("="*70)

        # Sync results to enriched_data.json
        print()
        print("🔄 Syncing results to enriched_data.json...")
        try:
            result = subprocess.run(['python3', 'sync_gpt_results.py'], capture_output=True, text=True)
            print(result.stdout)
        except:
            pass

        return 0
    else:
        print("="*70)
        print("⚠️  SOME ISSUES REMAIN")
        print("="*70)
        print()
        print("Manual actions needed:")
        if any(t == 'missing_gpt' for t, _ in remaining_issues):
            print("  1. Click 'Analyze Only' in Criteria Manager")
        if any(t == 'missing_custom' for t, _ in remaining_issues):
            print("  2. Run 'python3 custom_criteria.py' or click 'Recalculate' in Custom Criteria tab")
        if any(t == 'missing_both' for t, _ in remaining_issues):
            print("  3. Run 'Full Update' for properties missing everything")

        return 1

if __name__ == '__main__':
    try:
        sys.exit(main())
    except KeyboardInterrupt:
        print("\n\n⚠️  Interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n\n❌ Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)