#!/usr/bin/env python3
"""
Attempt to geocode properties from location hints in titles
"""

import re
from geopy.geocoders import Nominatim
import time
from db_manager import DatabaseManager

# Europe bounding box
EUROPE_BBOX = {
    'lat_min': 35.0,
    'lat_max': 72.0,
    'lon_min': -10.0,
    'lon_max': 40.0
}

def is_in_europe(lat, lon):
    """Check if coordinates are in Europe"""
    return (EUROPE_BBOX['lat_min'] <= lat <= EUROPE_BBOX['lat_max'] and
            EUROPE_BBOX['lon_min'] <= lon <= EUROPE_BBOX['lon_max'])

def extract_location_from_title(title):
    """Extract potential location from title"""
    if not title:
        return None

    # Pattern 1: Postal code + location (e.g., "35330 VAL-D'ANAST")
    match = re.search(r'(\d{5})\s+([A-Z][A-Za-z\-\']+)', title)
    if match:
        return f"{match.group(2)}, France"  # Most postal codes like this are France

    # Pattern 2: "in Location" or "te koop in Location"
    match = re.search(r'(?:in|koop in)\s+([A-Z][A-Za-z\-\']+(?:\s+[A-Z][A-Za-z\-\']+)?)', title)
    if match:
        return match.group(1)

    # Pattern 3: Location at start of title
    match = re.search(r'^([A-Z][A-Za-z\-\']+)', title)
    if match and len(match.group(1)) > 3:  # Avoid short words like "HUT"
        return match.group(1)

    return None

def geocode_location(geolocator, location_text):
    """Try to geocode a location"""
    try:
        time.sleep(1)
        location = geolocator.geocode(location_text, exactly_one=True, timeout=10)

        if location:
            lat, lon = location.latitude, location.longitude
            if is_in_europe(lat, lon):
                return lat, lon
    except Exception as e:
        print(f"   ⚠️ Geocoding error: {e}")

    return None, None

def main():
    print("="*70)
    print("GEOCODE FROM TITLE HINTS")
    print("="*70)

    db = DatabaseManager()
    geolocator = Nominatim(user_agent="farmmatch_title_geocoder")

    # Manual location overrides for known properties
    manual_locations = {
        'https://www.properstar.nl/listing/104321403': 'Val-d\'Anast, Bretagne, France',
        'https://www.properstar.nl/listing/99180334': 'Sobrado, A Coruña, Spain',
        'https://www.properstar.nl/listing/77862600': 'Iruz, Lugo, Spain'
    }

    missing = db.get_missing_geocoded_properties()
    print(f"\n📍 Properties to geocode: {len(missing)}\n")

    geocoded = 0

    for i, prop in enumerate(missing, 1):
        url = prop['url']
        title = prop.get('title', '')[:80]

        print(f"[{i}/{len(missing)}] {title}...")

        # Check manual override first
        if url in manual_locations:
            location_text = manual_locations[url]
            print(f"   📍 Manual location: {location_text}")
        else:
            # Extract from title
            location_text = extract_location_from_title(title)
            if not location_text:
                print(f"   ❌ No location hint found")
                continue
            print(f"   📍 Extracted: {location_text}")

        # Geocode
        lat, lon = geocode_location(geolocator, location_text)

        if lat and lon:
            print(f"   ✅ Geocoded → ({lat:.6f}, {lon:.6f})")
            success = db.update_geocoding(
                url=url,
                lat=lat,
                lon=lon,
                confidence='low',  # Lower confidence for title-based
                source=f'title:{location_text}'
            )
            if success:
                geocoded += 1
                print(f"   💾 Saved")
        else:
            print(f"   ❌ Could not geocode")

    # Export
    if geocoded > 0:
        print(f"\n📤 Exporting to JSON...")
        db.export_to_json()

    # Stats
    geo_stats = db.get_geocoding_stats()

    print("\n" + "="*70)
    print("📊 RESULTS")
    print("="*70)
    print(f"✅ Successfully geocoded: {geocoded}")
    print(f"📍 New coverage: {geo_stats['geocoded']}/{geo_stats['total']} ({geo_stats['coverage_percent']}%)")

if __name__ == '__main__':
    main()
