#!/usr/bin/env python3
"""
Geocode properties that have postal_code/city but no lat/lon.

Uses Nominatim (free, 1 req/sec rate limit).

Usage:
    python3 geocode_properties.py                # All missing coordinates
    python3 geocode_properties.py --limit 3      # Test on 3 properties
    python3 geocode_properties.py --dry-run      # Show what would be geocoded
    python3 geocode_properties.py --force         # Re-geocode all
"""
import argparse
import time
import requests

from store import load, persist, upsert, is_active, short_url, NOMINATIM_UA

NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"

# ISO country code → Nominatim countrycodes + display name
COUNTRY_MAP = {
    'FR': ('fr', 'France'),
    'IT': ('it', 'Italy'),
    'ES': ('es', 'Spain'),
    'PT': ('pt', 'Portugal'),
    'GR': ('gr', 'Greece'),
    'France': ('fr', 'France'),
}



# Coordinate bounds per country for validation (from validate_coordinates.py)
COUNTRY_BOUNDS = {
    'fr': {'lat': (41.0, 51.5), 'lon': (-5.5, 10.0)},
    'it': {'lat': (35.0, 47.5), 'lon': (6.0, 19.0)},
    'es': {'lat': (35.0, 44.0), 'lon': (-10.0, 5.0)},
    'pt': {'lat': (36.5, 42.5), 'lon': (-10.0, -6.0)},
    'gr': {'lat': (34.5, 42.0), 'lon': (19.0, 29.0)},
}


def validate_coords(lat, lon, countrycode):
    """Check if coordinates fall within expected country bounds. Returns True/False/None."""
    bounds = COUNTRY_BOUNDS.get(countrycode)
    if not bounds:
        return None  # can't validate unknown countries
    lat_ok = bounds['lat'][0] <= lat <= bounds['lat'][1]
    lon_ok = bounds['lon'][0] <= lon <= bounds['lon'][1]
    return lat_ok and lon_ok


def resolve_country(prop):
    """Get (countrycode, country_name) from property data."""
    iso = prop.get('country', '')
    if iso in COUNTRY_MAP:
        return COUNTRY_MAP[iso]
    # Detect from URL
    url = prop.get('url', '')
    if 'frenchestateagents' in url or 'green-acres.fr' in url:
        return 'fr', 'France'
    if 'idealista.it' in url or 'immobiliare.it' in url:
        return 'it', 'Italy'
    if 'idealista.com/en' in url:
        return 'es', 'Spain'
    if 'idealista.pt' in url:
        return 'pt', 'Portugal'
    if 'properstar' in url:
        # Properstar: try breadcrumb for country
        bc = (prop.get('breadcrumb') or '').lower()
        if 'france' in bc or 'frankrijk' in bc:
            return 'fr', 'France'
        if 'ital' in bc:
            return 'it', 'Italy'
        if 'spain' in bc or 'spanje' in bc:
            return 'es', 'Spain'
        if 'portugal' in bc:
            return 'pt', 'Portugal'
    # Don't default to France — return None so caller knows it's unknown
    return None, None


def needs_geocoding(prop, force=False):
    """Property is active, has location info, but no coordinates."""
    if not is_active(prop):
        return False
    if not force and prop.get('lat') and prop.get('lon'):
        return False
    if not prop.get('postal_code') and not prop.get('city') and not prop.get('location'):
        return False
    return True


def geocode(postal_code=None, city=None, department=None, country='France', countrycodes='fr'):
    """Geocode using Nominatim. Returns (lat, lon) or (None, None)."""
    params = {
        'format': 'json',
        'limit': 1,
        'countrycodes': countrycodes,
    }

    # Try structured query first (most precise)
    if postal_code and city:
        params['postalcode'] = postal_code
        params['city'] = city
        params['country'] = country
    elif postal_code:
        params['postalcode'] = postal_code
        params['country'] = country
    elif city:
        q = city
        if department:
            q += f", {department}"
        q += f", {country}"
        params['q'] = q
    else:
        return None, None

    try:
        resp = requests.get(
            NOMINATIM_URL,
            params=params,
            headers={'User-Agent': NOMINATIM_UA},
            timeout=10
        )
        data = resp.json()
        if data:
            return float(data[0]['lat']), float(data[0]['lon'])
    except Exception:
        pass

    # Fallback: free-form query
    if 'q' not in params and (postal_code or city):
        parts = [p for p in [city, postal_code, department, country] if p]
        fallback_params = {
            'format': 'json',
            'limit': 1,
            'q': ', '.join(parts),
        }
        try:
            resp = requests.get(
                NOMINATIM_URL,
                params=fallback_params,
                headers={'User-Agent': NOMINATIM_UA},
                timeout=10
            )
            data = resp.json()
            if data:
                return float(data[0]['lat']), float(data[0]['lon'])
        except Exception:
            pass

    return None, None


def main():
    parser = argparse.ArgumentParser(description='Geocode properties via Nominatim')
    parser.add_argument('--limit', type=int, default=0, help='Max properties to geocode (0=all)')
    parser.add_argument('--dry-run', action='store_true', help='Show what would be geocoded')
    parser.add_argument('--force', action='store_true', help='Re-geocode all')
    args = parser.parse_args()

    store = load()
    candidates = [url for url, p in store.items() if needs_geocoding(p, args.force)]

    if args.limit:
        candidates = candidates[:args.limit]

    print(f"Properties to geocode: {len(candidates)}")

    if args.dry_run:
        for url in candidates:
            p = store[url]
            pc = p.get('postal_code', '?')
            city = p.get('city', '?')
            print(f"  {short_url(url)}  {pc} {city}")
        return

    if not candidates:
        print("Nothing to do.")
        return

    geocoded = 0
    failed = 0

    for i, url in enumerate(candidates):
        p = store[url]
        pc = p.get('postal_code')
        city = p.get('city') or p.get('location') or p.get('title')
        dept = p.get('department')
        cc, country_name = resolve_country(p)
        if cc is None:
            failed += 1
            print(f"SKIP (unknown country)")
            continue

        # Clean city: strip region in parentheses, e.g. "Lacapelle-Biron (Lot and Garonne)" → "Lacapelle-Biron"
        if city:
            import re as _re
            city_clean = _re.sub(r'\s*\([^)]*\)\s*$', '', city).strip()
            if city_clean:
                city = city_clean

        loc = (city or '')[:30]

        print(f"  [{i+1}/{len(candidates)}] {loc} ({pc or '?'}, {cc})...", end=' ', flush=True)

        lat, lon = geocode(postal_code=pc, city=city, department=dept,
                           country=country_name, countrycodes=cc)

        if lat and lon:
            # Validate coordinates are in expected country
            valid = validate_coords(lat, lon, cc) if cc else None
            if valid is False:
                failed += 1
                print(f"REJECTED ({lat:.4f}, {lon:.4f}) outside {cc.upper()} bounds")
            else:
                upsert(store, url, {'lat': lat, 'lon': lon, 'coord_source': 'nominatim'})
                geocoded += 1
                print(f"OK ({lat:.4f}, {lon:.4f})")
        else:
            failed += 1
            print("FAIL (no result)")

        time.sleep(1.1)  # Nominatim rate limit

    print(f"\nGeocoded {geocoded}/{len(candidates)} ({failed} failed)")

    if geocoded > 0:
        persist(store)


if __name__ == '__main__':
    main()
