#!/usr/bin/env python3
"""
Sync new properties from extracted_property_urls.csv into enriched_data.json
This ensures that all scraped favorites are present in enriched_data.json
before the availability check and analysis steps run.
"""
import json
import pandas as pd
from pathlib import Path
from datetime import datetime

def sync_csv_to_enriched():
    """
    Add new properties from extracted_property_urls.csv to enriched_data.json
    with default values, so they can be processed by the pipeline.
    """
    csv_file = Path("extracted_property_urls.csv")
    enriched_file = Path("enriched_data.json")

    if not csv_file.exists():
        print("❌ extracted_property_urls.csv not found!")
        return False

    # Load CSV with all current favorites
    print(f"📥 Loading scraped favorites from {csv_file}...")
    df = pd.read_csv(csv_file)
    csv_urls = set(df['Property URL'].tolist())
    print(f"   Found {len(csv_urls)} properties in CSV")

    # Load existing enriched data
    if enriched_file.exists():
        print(f"📚 Loading existing enriched data...")
        with open(enriched_file, 'r', encoding='utf-8') as f:
            existing_properties = json.load(f)
        existing_urls = {prop['url'] for prop in existing_properties}
        print(f"   Found {len(existing_urls)} properties in enriched_data.json")
    else:
        print(f"ℹ️  No existing enriched_data.json found, creating new one")
        existing_properties = []
        existing_urls = set()

    # Update thumbnails for existing properties
    thumbnails_updated = 0
    if 'Thumbnail' in df.columns:
        print(f"\n🖼️  Updating thumbnails for existing properties...")
        for prop in existing_properties:
            url = prop['url']
            # Find matching row in CSV
            matching_rows = df[df['Property URL'] == url]
            if not matching_rows.empty:
                row = matching_rows.iloc[0]
                thumbnail = row.get('Thumbnail', '')
                if pd.notna(thumbnail) and thumbnail:
                    # Only update if thumbnail doesn't exist or is empty
                    if 'thumbnail' not in prop or not prop.get('thumbnail'):
                        prop['thumbnail'] = thumbnail
                        thumbnails_updated += 1
        if thumbnails_updated > 0:
            print(f"   ✅ Updated {thumbnails_updated} thumbnail URLs")

    # Find new properties
    new_urls = csv_urls - existing_urls

    if not new_urls:
        print("✅ No new properties to add - enriched_data.json is up to date")
        return True

    print(f"\n🆕 Found {len(new_urls)} new properties to add:")

    # Add new properties with default values
    added_count = 0
    for url in new_urls:
        # Get property details from CSV
        row = df[df['Property URL'] == url].iloc[0]

        location = row.get('Locatie', 'Unknown') if pd.notna(row.get('Locatie')) else 'Unknown'
        price_raw = row.get('Prijs', None)
        thumbnail = row.get('Thumbnail', '') if pd.notna(row.get('Thumbnail')) else ''

        # Parse price (remove non-digits)
        price = None
        if pd.notna(price_raw):
            try:
                if isinstance(price_raw, str):
                    price_digits = ''.join(filter(str.isdigit, price_raw))
                    price = int(price_digits) if price_digits else None
                else:
                    price = int(price_raw)

                # Sanity check (reasonable price range)
                if price and not (10000 <= price <= 10000000):
                    price = None
            except:
                price = None

        # Create new property entry with default values
        new_prop = {
            'url': url,
            'title': 'Pending Analysis',
            'summary': '',
            'overall_score': 0.0,
            'gpt_score': 0.0,
            'custom_score': 0.0,
            'location': location,
            'lat': None,
            'lon': None,
            'location_source': None,
            'gps_source': None,
            'analysis': '',
            'criteria': {},
            'risk_profile': None,
            'status': 'Active',
            'price': price,
            'thumbnail': thumbnail,
            'added_at': datetime.now().isoformat()
        }

        existing_properties.append(new_prop)
        added_count += 1

        print(f"   + {location} - €{price if price else 'N/A'}")
        print(f"     {url}")

    # Save updated enriched_data.json
    print(f"\n💾 Saving updated enriched_data.json...")
    with open(enriched_file, 'w', encoding='utf-8') as f:
        json.dump(existing_properties, f, ensure_ascii=False, indent=2)

    print(f"✅ Added {added_count} new properties to enriched_data.json")
    print(f"📊 Total properties: {len(existing_properties)}")

    return True

if __name__ == "__main__":
    print("=" * 70)
    print("🔄 CSV to Enriched Data Sync")
    print("=" * 70)
    print("This script adds new properties from extracted_property_urls.csv")
    print("to enriched_data.json with default values.\n")

    success = sync_csv_to_enriched()

    if success:
        print("\n✨ Sync complete!")
        print("   New properties are now ready for availability check and analysis")
    else:
        print("\n❌ Sync failed")
