#!/usr/bin/env python3
"""
Sync GPT analysis results from analysis_output.csv into enriched_data.json
This is a temporary fix until analyze_from_urls_optimized.py writes directly to enriched_data.json
"""

import json
import pandas as pd
import sys
from parse_criteria import extract_criteria_scores, extract_risk_profile

def sync_gpt_results():
    """Sync GPT results from CSV to JSON"""

    # Load both files
    print("📊 Loading data files...")

    try:
        with open('enriched_data.json', 'r') as f:
            properties = json.load(f)
        print(f"   ✓ Loaded {len(properties)} properties from enriched_data.json")
    except Exception as e:
        print(f"   ✗ Error loading enriched_data.json: {e}")
        return False

    try:
        df_analysis = pd.read_csv('analysis_output.csv')
        print(f"   ✓ Loaded {len(df_analysis)} analyses from analysis_output.csv")
    except Exception as e:
        print(f"   ✗ Error loading analysis_output.csv: {e}")
        return False

    # Create URL to analysis mapping (use LAST entry for each URL, in case of duplicates)
    analysis_by_url = {}
    for _, row in df_analysis.iterrows():
        url = row['URL']
        # Skip if score is 0 (no analysis)
        score = row['Gewogen Score']
        if score == 0:
            continue

        # Always use latest (overwrite previous)
        analysis_by_url[url] = {
            'analysis': row['GPT Analyse'],
            'weighted_score': score
        }

    print(f"   ✓ Found {len(analysis_by_url)} unique URLs with scores > 0")

    # Update properties
    print("\n🔄 Syncing GPT results into enriched_data.json...")
    updated = 0
    skipped = 0

    for prop in properties:
        url = prop['url']

        if url in analysis_by_url:
            analysis_data = analysis_by_url[url]

            # Extract scores and info from analysis
            gpt_analysis = analysis_data['analysis']
            criteria = extract_criteria_scores(gpt_analysis)
            risk_profile = extract_risk_profile(gpt_analysis)

            # Update property
            old_gpt_score = prop.get('gpt_score', 0)
            new_gpt_score = analysis_data['weighted_score']

            prop['gpt_score'] = new_gpt_score
            prop['analysis'] = gpt_analysis
            prop['criteria'] = criteria
            if risk_profile:
                prop['risk_profile'] = risk_profile

            # Recalculate overall_score (60% GPT, 40% custom)
            custom_score = prop.get('custom_score', 0)
            prop['overall_score'] = round((new_gpt_score * 0.6) + (custom_score * 0.4), 2)

            updated += 1
            if updated <= 5:  # Show first 5
                print(f"   ✓ {prop.get('location', 'Unknown')}: gpt_score {old_gpt_score} → {new_gpt_score}")
        else:
            skipped += 1

    # Save updated data
    print(f"\n💾 Saving updated enriched_data.json...")
    try:
        with open('enriched_data.json', 'w') as f:
            json.dump(properties, f, indent=2, ensure_ascii=False)
        print(f"   ✓ Saved successfully")
    except Exception as e:
        print(f"   ✗ Error saving: {e}")
        return False

    # Summary
    print("\n" + "="*60)
    print("📊 SYNC SUMMARY")
    print("="*60)
    print(f"Total properties: {len(properties)}")
    print(f"Updated with GPT scores: {updated}")
    print(f"Already had scores: {skipped}")
    print(f"Pending analysis: {len(properties) - updated - skipped}")
    print("="*60)

    return True

if __name__ == '__main__':
    success = sync_gpt_results()
    sys.exit(0 if success else 1)
