#!/usr/bin/env python3
"""
Reverse-geocode properties with lat/lon but missing department/region.

Uses Nominatim (free, 1 req/sec). Adds: country, state (region),
county (department), city, postcode to each property.

Usage:
    python3 enrich_geocode.py              # Geocode all missing
    python3 enrich_geocode.py --limit 5    # Test on 5 properties
    python3 enrich_geocode.py --dry-run    # Show what would be geocoded
"""
import argparse
import json
import time
import urllib.request
import urllib.parse
from pathlib import Path

from store import load, save, persist, upsert, is_active, NOMINATIM_UA as USER_AGENT
NOMINATIM_URL = 'https://nominatim.openstreetmap.org/reverse'


def reverse_geocode(lat, lon):
    """Call Nominatim reverse-geocode API. Returns address dict or None."""
    params = urllib.parse.urlencode({
        'lat': lat, 'lon': lon, 'format': 'json', 'addressdetails': 1
    })
    url = f"{NOMINATIM_URL}?{params}"
    req = urllib.request.Request(url, headers={'User-Agent': USER_AGENT})
    try:
        with urllib.request.urlopen(req, timeout=10) as resp:
            data = json.loads(resp.read())
            return data.get('address', {})
    except Exception as e:
        print(f"  Geocode failed for {lat},{lon}: {e}")
        return None


def needs_geocode(prop):
    """Property has coords but no county/department."""
    if not prop.get('lat') or not prop.get('lon'):
        return False
    if not is_active(prop):
        return False
    return not prop.get('county')


def main():
    parser = argparse.ArgumentParser(description='Reverse-geocode properties')
    parser.add_argument('--limit', type=int, default=0, help='Max properties to geocode (0=all)')
    parser.add_argument('--dry-run', action='store_true', help='Show what would be geocoded')
    args = parser.parse_args()

    store = load()
    candidates = [url for url, p in store.items() if needs_geocode(p)]

    if args.limit:
        candidates = candidates[:args.limit]

    print(f"Properties needing geocode: {len(candidates)}")

    if args.dry_run:
        for url in candidates[:10]:
            p = store[url]
            print(f"  {p.get('lat'):.4f}, {p.get('lon'):.4f}  {url[:60]}")
        if len(candidates) > 10:
            print(f"  ... and {len(candidates) - 10} more")
        return

    geocoded = 0
    for i, url in enumerate(candidates):
        prop = store[url]
        lat, lon = prop['lat'], prop['lon']

        addr = reverse_geocode(lat, lon)
        if not addr:
            continue

        fields = {}
        if addr.get('country'):
            fields['country'] = addr['country']
        if addr.get('state'):
            fields['region'] = addr['state']
        if addr.get('county'):
            fields['county'] = addr['county']
        if addr.get('city') or addr.get('town') or addr.get('village'):
            fields['city'] = addr.get('city') or addr.get('town') or addr.get('village')
        if addr.get('postcode'):
            fields['postcode'] = addr['postcode']

        # Update location if it was empty or generic
        loc = prop.get('location') or ''
        if not loc or loc.lower() in ('unknown', ''):
            parts = [fields.get('city'), fields.get('county'), fields.get('region')]
            fields['location'] = ', '.join(p for p in parts if p)
            fields['location_source'] = 'nominatim'

        upsert(store, url, fields)
        geocoded += 1

        if (i + 1) % 10 == 0:
            print(f"  Geocoded {i + 1}/{len(candidates)}...")
        if (i + 1) % 25 == 0:
            save(store)
            print(f"  [saved progress]")

        time.sleep(1.1)  # Nominatim rate limit

    print(f"\nGeocoded {geocoded}/{len(candidates)} properties")

    if geocoded > 0:
        persist(store)


if __name__ == '__main__':
    main()
