#!/usr/bin/env python3
"""
Geocode missing properties using database
"""

import asyncio
from playwright.async_api import async_playwright
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
from db_manager import DatabaseManager

# Europe bounding box
EUROPE_BBOX = {
    'lat_min': 35.0,
    'lat_max': 72.0,
    'lon_min': -10.0,
    'lon_max': 40.0
}

def is_in_europe(lat, lon):
    """Check if coordinates are in Europe"""
    if not lat or not lon:
        return False
    return (EUROPE_BBOX['lat_min'] <= lat <= EUROPE_BBOX['lat_max'] and
            EUROPE_BBOX['lon_min'] <= lon <= EUROPE_BBOX['lon_max'])

def parse_breadcrumb(breadcrumb):
    """Parse breadcrumb to extract location parts"""
    if not breadcrumb:
        return None

    # Split by > and clean
    parts = [p.strip() for p in str(breadcrumb).split('>')]

    # Remove property types
    property_types = {'Huis', 'Boerderij', 'Villa', 'House', 'Farm', 'Casa', 'Maison',
                     'Finca', 'Terreno', 'Land', 'Property', 'Cottage', 'Estate',
                     'Grond', 'Perceel', 'Vastgoed', 'Plot', 'Terrain'}
    parts = [p for p in parts if p and p not in property_types]

    return parts

def geocode_with_fallback(geolocator, breadcrumb, retries=3):
    """Try to geocode with fallback strategy"""
    parts = parse_breadcrumb(breadcrumb)
    if not parts:
        return None, None, None

    # Try different combinations
    attempts = []
    if len(parts) >= 3:
        attempts.append(', '.join(parts[-3:]))
    if len(parts) >= 2:
        attempts.append(', '.join(parts[-2:]))
    if len(parts) >= 1:
        attempts.append(parts[-1])

    for attempt_text in attempts:
        for retry in range(retries):
            try:
                time.sleep(1)  # Rate limiting
                location = geolocator.geocode(attempt_text, exactly_one=True, timeout=10)

                if location:
                    lat, lon = location.latitude, location.longitude

                    if is_in_europe(lat, lon):
                        print(f"   ✅ '{attempt_text}' → ({lat:.6f}, {lon:.6f})")
                        return lat, lon, attempt_text
                    else:
                        print(f"   ❌ '{attempt_text}' → outside Europe")
                        break

            except (GeocoderTimedOut, GeocoderServiceError) as e:
                if retry < retries - 1:
                    print(f"   ⚠️ Timeout, retrying...")
                    time.sleep(2)
                else:
                    print(f"   ❌ Geocoding error: {e}")
                    break
            except Exception as e:
                print(f"   ❌ Error: {e}")
                break

    return None, None, None

async def extract_breadcrumb(page, url):
    """Extract breadcrumb from property page"""
    try:
        await page.goto(url, wait_until="domcontentloaded", timeout=30000)
        await page.wait_for_timeout(2000)

        # Try breadcrumb
        breadcrumb = await page.query_selector(".breadcrumb-container")
        if not breadcrumb:
            breadcrumb = await page.query_selector("nav[aria-label='breadcrumb']")
        if not breadcrumb:
            breadcrumb = await page.query_selector(".breadcrumb")

        if breadcrumb:
            links = await breadcrumb.query_selector_all("a")
            parts = []
            for link in links:
                text = await link.inner_text()
                text = text.strip()
                if text and text.lower() not in ['home', 'properstar', 'properties']:
                    parts.append(text)

            if parts:
                return " > ".join(parts)

        # Fallback: meta tags
        location_meta = await page.query_selector("meta[property='og:locality']")
        if location_meta:
            location = await location_meta.get_attribute("content")
            if location:
                return location

        return None

    except Exception as e:
        print(f"  ⚠️ Error extracting: {str(e)[:100]}")
        return None

async def main():
    print("="*70)
    print("GEOCODE MISSING PROPERTIES (DATABASE-BACKED)")
    print("="*70)

    db = DatabaseManager()

    # Get missing properties
    missing = db.get_missing_geocoded_properties()
    print(f"\n📍 Properties missing coordinates: {len(missing)}\n")

    if not missing:
        print("✅ All properties have coordinates!")
        return

    # Initialize geocoder
    geolocator = Nominatim(user_agent="farmmatch_db_geocoder")

    # Initialize browser
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context()
        page = await context.new_page()

        geocoded_count = 0
        failed_count = 0

        for i, prop in enumerate(missing, 1):
            url = prop['url']
            title = prop.get('title', 'No title')[:60]
            breadcrumb = prop.get('breadcrumb')

            print(f"[{i}/{len(missing)}] {title}...")
            print(f"   URL: {url}")

            # Extract breadcrumb if not present
            if not breadcrumb:
                print(f"   📋 Extracting breadcrumb...")
                breadcrumb = await extract_breadcrumb(page, url)
                if breadcrumb:
                    print(f"   ✅ {breadcrumb}")
                    # Update breadcrumb in database
                    db.update_property(url, breadcrumb=breadcrumb)
                else:
                    print(f"   ❌ No breadcrumb found")
                    failed_count += 1
                    continue
            else:
                print(f"   📋 {breadcrumb}")

            # Geocode
            lat, lon, geocoded_from = geocode_with_fallback(geolocator, breadcrumb)

            if lat and lon:
                # Update in database
                success = db.update_geocoding(
                    url=url,
                    lat=lat,
                    lon=lon,
                    confidence='medium',
                    source=f'breadcrumb:{geocoded_from}'
                )
                if success:
                    geocoded_count += 1
                    print(f"   💾 Saved to database")
                else:
                    print(f"   ⚠️ Failed to save")
                    failed_count += 1
            else:
                print(f"   ❌ Could not geocode")
                failed_count += 1

            # Save progress
            if i % 5 == 0:
                print(f"\n   📊 Progress: {geocoded_count} geocoded, {failed_count} failed\n")

            await asyncio.sleep(1)

        await browser.close()

    # Export to JSON
    print("\n📤 Exporting to JSON...")
    db.export_to_json()

    # Final stats
    geo_stats = db.get_geocoding_stats()

    print("\n" + "="*70)
    print("📊 RESULTS")
    print("="*70)
    print(f"✅ Successfully geocoded: {geocoded_count}")
    print(f"❌ Failed: {failed_count}")
    print(f"\n📍 New coverage: {geo_stats['geocoded']}/{geo_stats['total']} ({geo_stats['coverage_percent']}%)")

if __name__ == '__main__':
    asyncio.run(main())
