#!/usr/bin/env python3
"""
Migrate data from CSV/JSON to SQLite database
"""

import json
import pandas as pd
import sqlite3
from datetime import datetime
from pathlib import Path
from database_schema import DATABASE_PATH, create_database, get_connection

def migrate_data():
    """Migrate all data from CSV and JSON to SQLite"""

    print("="*70)
    print("MIGRATING DATA TO SQLITE")
    print("="*70)

    # Create database
    create_database()
    conn = get_connection()
    cursor = conn.cursor()

    # Load CSV (optional - has breadcrumbs)
    csv_data = {}
    try:
        df_csv = pd.read_csv('extracted_property_urls.csv')
        for _, row in df_csv.iterrows():
            csv_data[row['URL']] = {
                'breadcrumb': row.get('Breadcrumb'),
                'scraped_location': row.get('Locatie'),
                'scraped_price': row.get('Prijs')
            }
        print(f"\n✅ Loaded {len(csv_data)} properties from CSV")
    except FileNotFoundError:
        print("\n⚠️  CSV not found, using JSON only")

    # Load JSON (main data)
    try:
        with open('enriched_data.json', 'r') as f:
            properties = json.load(f)
        print(f"✅ Loaded {len(properties)} properties from JSON")
    except FileNotFoundError:
        print("❌ enriched_data.json not found!")
        return

    # Migrate properties
    print(f"\n📦 Migrating {len(properties)} properties...")

    migrated = 0
    skipped = 0

    for prop in properties:
        url = prop.get('url')
        if not url:
            skipped += 1
            continue

        # Merge CSV data if available
        csv_info = csv_data.get(url, {})
        breadcrumb = prop.get('breadcrumb') or csv_info.get('breadcrumb')

        try:
            # Check if property exists
            cursor.execute("SELECT id FROM properties WHERE url = ?", (url,))
            existing = cursor.fetchone()

            if existing:
                property_id = existing[0]
                # Update existing property
                cursor.execute("""
                    UPDATE properties SET
                        title = ?, summary = ?, description = ?, price = ?,
                        location = ?, breadcrumb = ?, status = ?,
                        lat = ?, lon = ?, geocoding_confidence = ?, geocoded_from = ?,
                        gpt_score = ?, gpt_reasoning = ?, custom_score = ?, custom_reasoning = ?,
                        final_score = ?, risk_profile = ?, last_updated = ?
                    WHERE id = ?
                """, (
                    prop.get('title'),
                    prop.get('summary'),
                    prop.get('description'),
                    prop.get('price') or csv_info.get('scraped_price'),
                    prop.get('location'),
                    breadcrumb,
                    prop.get('status', 'Active'),
                    prop.get('lat'),
                    prop.get('lon'),
                    prop.get('geocoding_confidence'),
                    prop.get('geocoded_from'),
                    prop.get('gpt_score'),
                    prop.get('gpt_reasoning'),
                    prop.get('custom_score'),
                    prop.get('custom_reasoning'),
                    prop.get('final_score'),
                    prop.get('risk_profile'),
                    datetime.now().isoformat(),
                    property_id
                ))
            else:
                # Insert new property
                cursor.execute("""
                    INSERT INTO properties (
                        url, title, summary, description, price, location, breadcrumb,
                        status, lat, lon, geocoding_confidence, geocoded_from,
                        gpt_score, gpt_reasoning, custom_score, custom_reasoning,
                        final_score, risk_profile, last_updated
                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """, (
                    url,
                    prop.get('title'),
                    prop.get('summary'),
                    prop.get('description'),
                    prop.get('price') or csv_info.get('scraped_price'),
                    prop.get('location'),
                    breadcrumb,
                    prop.get('status', 'Active'),
                    prop.get('lat'),
                    prop.get('lon'),
                    prop.get('geocoding_confidence'),
                    prop.get('geocoded_from'),
                    prop.get('gpt_score'),
                    prop.get('gpt_reasoning'),
                    prop.get('custom_score'),
                    prop.get('custom_reasoning'),
                    prop.get('final_score'),
                    prop.get('risk_profile'),
                    datetime.now().isoformat()
                ))
                property_id = cursor.lastrowid

            # Migrate criteria scores if present
            criteria = prop.get('criteria_scores', {})
            if criteria:
                for criterion, score in criteria.items():
                    if isinstance(score, (int, float)):
                        cursor.execute("""
                            INSERT OR REPLACE INTO criteria_scores (
                                property_id, criterion, score
                            ) VALUES (?, ?, ?)
                        """, (property_id, criterion, float(score)))

            # Add geocoding history if has coordinates
            if prop.get('lat') and prop.get('lon'):
                cursor.execute("""
                    INSERT INTO geocoding_history (
                        property_id, lat, lon, confidence, source
                    ) VALUES (?, ?, ?, ?, ?)
                """, (
                    property_id,
                    prop.get('lat'),
                    prop.get('lon'),
                    prop.get('geocoding_confidence'),
                    prop.get('geocoded_from', 'migration')
                ))

            # Add analysis history
            if prop.get('gpt_score'):
                cursor.execute("""
                    INSERT INTO analysis_history (
                        property_id, analysis_type, score, reasoning
                    ) VALUES (?, 'gpt', ?, ?)
                """, (property_id, prop.get('gpt_score'), prop.get('gpt_reasoning')))

            if prop.get('custom_score'):
                cursor.execute("""
                    INSERT INTO analysis_history (
                        property_id, analysis_type, score, reasoning
                    ) VALUES (?, 'custom', ?, ?)
                """, (property_id, prop.get('custom_score'), prop.get('custom_reasoning')))

            migrated += 1

            if migrated % 50 == 0:
                conn.commit()
                print(f"  📦 Migrated {migrated} properties...")

        except sqlite3.IntegrityError as e:
            print(f"  ❌ IntegrityError for {url}: {e}")
            skipped += 1
        except Exception as e:
            print(f"  ❌ Error migrating {url}: {e}")
            import traceback
            traceback.print_exc()
            skipped += 1

    # Final commit
    conn.commit()

    # Update metadata
    cursor.execute("""
        INSERT OR REPLACE INTO system_metadata (key, value)
        VALUES ('last_migration', ?)
    """, (datetime.now().isoformat(),))

    cursor.execute("""
        INSERT OR REPLACE INTO system_metadata (key, value)
        VALUES ('migration_source', 'enriched_data.json + extracted_property_urls.csv')
    """)

    conn.commit()

    # Show statistics
    print("\n" + "="*70)
    print("📊 MIGRATION STATISTICS")
    print("="*70)

    cursor.execute("SELECT COUNT(*) FROM properties")
    total = cursor.fetchone()[0]

    cursor.execute("SELECT COUNT(*) FROM properties WHERE status = 'Active'")
    active = cursor.fetchone()[0]

    cursor.execute("SELECT COUNT(*) FROM properties WHERE lat IS NOT NULL AND lon IS NOT NULL")
    geocoded = cursor.fetchone()[0]

    cursor.execute("SELECT COUNT(*) FROM criteria_scores")
    criteria_count = cursor.fetchone()[0]

    cursor.execute("SELECT COUNT(*) FROM geocoding_history")
    geocoding_count = cursor.fetchone()[0]

    cursor.execute("SELECT COUNT(*) FROM analysis_history")
    analysis_count = cursor.fetchone()[0]

    print(f"✅ Successfully migrated: {migrated}")
    print(f"⚠️  Skipped: {skipped}")
    print(f"\n📊 Database contents:")
    print(f"  Properties (total): {total}")
    print(f"  Properties (active): {active}")
    print(f"  Geocoded properties: {geocoded} ({geocoded*100//total if total else 0}%)")
    print(f"  Criteria scores: {criteria_count}")
    print(f"  Geocoding history: {geocoding_count}")
    print(f"  Analysis history: {analysis_count}")

    conn.close()

    print(f"\n✅ Database created: {DATABASE_PATH}")
    print(f"📁 Size: {Path(DATABASE_PATH).stat().st_size / 1024:.1f} KB")

if __name__ == '__main__':
    migrate_data()
