#!/usr/bin/env python3
"""
Look up nearby amenities for properties using Overpass API (OpenStreetMap).

Stores results in the property store so they can be used as gates/filters.

Usage:
    python3 lookup_amenities.py                # All shortlisted with coords
    python3 lookup_amenities.py --all          # All active with coords
    python3 lookup_amenities.py --limit 5      # Test on 5 properties
    python3 lookup_amenities.py --force        # Re-lookup already enriched
    python3 lookup_amenities.py --dry-run      # Show what would be looked up
"""
import argparse
import json
import math
import time
import requests

from store import load, persist, upsert, is_active, short_url

import os
OVERPASS_URL = os.environ.get("OVERPASS_URL", "https://overpass-api.de/api/interpreter")

# Coordinate bounds per country for validation
COUNTRY_BOUNDS = {
    'FR': {'lat': (41.0, 51.5), 'lon': (-5.5, 10.0)},
    'IT': {'lat': (35.0, 47.5), 'lon': (6.0, 19.0)},
    'ES': {'lat': (35.0, 44.0), 'lon': (-10.0, 5.0)},
    'PT': {'lat': (36.5, 42.5), 'lon': (-10.0, -6.0)},
    'GR': {'lat': (34.5, 42.0), 'lon': (19.0, 29.0)},
}


def coords_in_bounds(lat, lon, country_code):
    """Check if coordinates are within expected country bounds."""
    bounds = COUNTRY_BOUNDS.get(country_code)
    if not bounds:
        return True  # can't validate, assume ok
    return (bounds['lat'][0] <= lat <= bounds['lat'][1] and
            bounds['lon'][0] <= lon <= bounds['lon'][1])

# Amenity types to search, with radius in meters
AMENITIES = {
    'bakery':        {'tag': 'shop=bakery', 'radius': 20000},
    'hospital':      {'tag': 'amenity=hospital', 'radius': 50000},
    'train_station': {'tag': 'railway=station', 'radius': 50000},
    'school':        {'tag': 'amenity=school', 'radius': 20000},
    'supermarket':   {'tag': 'shop=supermarket', 'radius': 20000},
    'town':          {'tag': 'place~"town|city"', 'radius': 30000},
    'water':         {'tag': 'natural~"water|lake|river"', 'radius': 5000},
    'coastline':     {'tag': 'natural=coastline', 'radius': 30000},
}

# Airports with NL routes (local calculation, no API needed)
AIRPORTS_NL = [
    ("Genoa", "GOA", 44.4133, 8.8375, "Transavia from AMS"),
    ("Pisa", "PSA", 43.6839, 10.3927, "Transavia/easyJet from AMS"),
    ("Bologna", "BLQ", 44.5354, 11.2887, "Transavia/KLM from AMS"),
    ("Ancona", "AOI", 43.6163, 13.3623, "Ryanair from EIN (seasonal)"),
    ("Rome FCO", "FCO", 41.8003, 12.2389, "KLM/Transavia from AMS"),
    ("Naples", "NAP", 40.886, 14.2908, "Transavia from AMS"),
    ("Valencia", "VLC", 39.4893, -0.4816, "Transavia from AMS"),
    ("Barcelona", "BCN", 41.2974, 2.0833, "KLM/Transavia from AMS"),
    ("Porto", "OPO", 41.2481, -8.6814, "KLM/Transavia from AMS"),
    ("Lisbon", "LIS", 38.7756, -9.1354, "KLM/Transavia from AMS"),
    ("Bordeaux", "BOD", 44.8283, -0.7156, "Transavia from AMS"),
    ("Toulouse", "TLS", 43.6291, 1.3638, "KLM from AMS"),
    ("Limoges", "LIG", 45.8628, 1.1794, "Ryanair from EIN (seasonal)"),
    ("Tours", "TUF", 47.4322, 0.7276, "Ryanair from EIN (seasonal)"),
    ("Paris CDG", "CDG", 49.0097, 2.5479, "KLM/Transavia from AMS"),
    ("Nantes", "NTE", 47.1532, -1.6107, "Transavia from AMS"),
    ("Rennes", "RNS", 48.0695, -1.7348, "via Paris"),
    ("Marseille", "MRS", 43.4393, 5.2214, "KLM/Transavia from AMS"),
    ("Brest", "BES", 48.4479, -4.4186, "via Paris"),
    ("Dinard", "DNR", 48.5877, -2.0797, "Ryanair from London"),
]


def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
    return R * 2 * math.asin(math.sqrt(a))


def find_nearest_airport(lat, lon):
    best = None
    best_dist = 9999
    for name, iata, alat, alon, airlines in AIRPORTS_NL:
        d = haversine_km(lat, lon, alat, alon)
        if d < best_dist:
            best_dist = d
            best = {'name': name, 'iata': iata, 'km': round(d), 'airlines': airlines}
    return best


def query_overpass(lat, lon, tag, radius):
    """Query Overpass for nearest amenity.

    Returns (result_or_None, error_str_or_None). Caller can distinguish
    'genuinely no nearby amenity' (None, None) from 'query failed'
    (None, 'reason') — previously both collapsed to None.
    """
    key, val = tag.split('=', 1) if '=' in tag else tag.split('~', 1)
    op = '=' if '=' in tag else '~'

    query = f"""
    [out:json][timeout:25];
    (
      node["{key}"{op}"{val}"](around:{radius},{lat},{lon});
      way["{key}"{op}"{val}"](around:{radius},{lat},{lon});
    );
    out center 5;
    """

    try:
        resp = requests.post(
            OVERPASS_URL,
            data={'data': query},
            headers={'Accept': 'application/json', 'User-Agent': 'paradisomatch/1.0 (jonathanlooman@gmail.com)'},
            timeout=30,
        )
        if resp.status_code != 200:
            return None, f'HTTP {resp.status_code}'
        try:
            data = resp.json()
        except ValueError:
            return None, 'invalid JSON (HTML error page?)'
        elements = data.get('elements', [])

        if not elements:
            return None, None  # genuinely empty, not an error

        best = None
        best_dist = 9999
        for el in elements:
            elat = el.get('lat') or el.get('center', {}).get('lat')
            elon = el.get('lon') or el.get('center', {}).get('lon')
            if elat and elon:
                d = haversine_km(lat, lon, elat, elon)
                name = el.get('tags', {}).get('name', '')
                if d < best_dist:
                    best_dist = d
                    best = {'name': name, 'km': round(d, 1)}

        return best, None
    except requests.exceptions.ConnectTimeout:
        return None, 'connect timeout'
    except requests.exceptions.ReadTimeout:
        return None, 'read timeout'
    except requests.exceptions.ConnectionError as e:
        return None, f'connection: {str(e)[:40]}'
    except Exception as e:
        return None, f'{type(e).__name__}: {str(e)[:40]}'


def preflight_overpass():
    """One-shot reachability test against a known-good location (Avignon center).
    Returns (ok, reason). Caller aborts loudly if not ok rather than silently
    recording 'no amenity found' for every property."""
    res, err = query_overpass(43.95, 4.81, 'shop=bakery', 5000)
    if err:
        return False, f'unreachable ({err})'
    if res is None:
        # 0 bakeries in central Avignon = endpoint serving empty/regional data
        return False, 'endpoint reachable but returned empty data for Avignon (suspect mirror)'
    return True, f'ok (nearest bakery: {res.get("name") or "?"} {res.get("km")}km)'


def lookup_property(lat, lon):
    """Look up all amenities for a property.

    Returns (result_dict, n_queries_attempted, n_queries_failed). The failure
    count lets the caller distinguish a rural property with few nearby
    amenities (low fail_count, mostly None results) from a backend outage
    (high fail_count). Without this distinction, both look identical
    downstream and silently distort scoring.
    """
    result = {}
    n_attempted = 0
    n_failed = 0
    fail_reasons = []

    # Airport (local calculation, no API needed)
    result['airport'] = find_nearest_airport(lat, lon)

    # Overpass queries
    for amenity, config in AMENITIES.items():
        n_attempted += 1
        info, err = query_overpass(lat, lon, config['tag'], config['radius'])
        result[amenity] = info
        if err:
            n_failed += 1
            if err not in fail_reasons:
                fail_reasons.append(err)
        time.sleep(1.1)  # Overpass rate limit

    if fail_reasons:
        result['_lookup_errors'] = fail_reasons[:3]
    return result, n_attempted, n_failed


def main():
    parser = argparse.ArgumentParser(description='Look up amenities for properties via Overpass API')
    parser.add_argument('--all', action='store_true', help='Process all active (not just shortlisted)')
    parser.add_argument('--limit', type=int, default=0, help='Max properties to process (0=all)')
    parser.add_argument('--force', action='store_true', help='Re-lookup already enriched')
    parser.add_argument('--dry-run', action='store_true', help='Show what would be processed')
    args = parser.parse_args()

    store = load()

    # Determine candidates
    if args.all:
        candidates = [url for url, p in store.items()
                      if is_active(p) and p.get('lat') and p.get('lon')
                      and (args.force or not p.get('amenities'))]
    else:
        # Only shortlisted (load shortlist)
        shortlist_path = 'cyber_prairie_shortlist.json'
        try:
            with open(shortlist_path) as f:
                data = json.load(f)
            shortlist_urls = {e['url'] for e in data.get('shortlist', [])}
        except FileNotFoundError:
            print("No shortlist found. Run cyber_prairie_score.py first, or use --all.")
            return

        candidates = [url for url in shortlist_urls
                      if url in store and store[url].get('lat') and store[url].get('lon')
                      and (args.force or not store[url].get('amenities'))]

    if args.limit:
        candidates = candidates[:args.limit]

    print(f"Properties to look up: {len(candidates)}")

    if args.dry_run:
        for url in candidates:
            p = store[url]
            print(f"  {short_url(url)}  ({p.get('lat')}, {p.get('lon')})")
        return

    if not candidates:
        print("Nothing to do.")
        return

    # Preflight: bail loudly if Overpass is unreachable, rather than
    # silently recording 'no amenity found' for every candidate.
    print("  Preflight: checking Overpass reachability against Avignon...")
    ok, reason = preflight_overpass()
    if not ok:
        print(f"  ABORT: Overpass unreachable — {reason}")
        print(f"  Endpoint: {OVERPASS_URL}")
        print(f"  Fix: unblock overpass-api.de (Hetzner: 162.55.144.139, 65.109.112.52)")
        print(f"       in your router / DNS / ISP firewall, or set OVERPASS_URL env var")
        print(f"       to a working mirror. Skipping amenity lookups this run.")
        return
    print(f"  Preflight: {reason}")

    enriched = 0
    failed_geometry = 0
    total_attempted = 0
    total_failed_queries = 0
    fully_failed_properties = 0

    for i, url in enumerate(candidates):
        p = store[url]
        lat = float(p['lat'])
        lon = float(p['lon'])
        loc = (p.get('location') or p.get('title') or '')[:40]
        country = (p.get('country') or '').upper()

        # Validate coordinates before querying
        if country and not coords_in_bounds(lat, lon, country):
            print(f"  [{i+1}/{len(candidates)}] {loc}... SKIP (coords {lat:.2f},{lon:.2f} outside {country})")
            failed_geometry += 1
            continue

        print(f"  [{i+1}/{len(candidates)}] {loc}...", end=' ', flush=True)

        amenities, n_att, n_fail = lookup_property(lat, lon)
        total_attempted += n_att
        total_failed_queries += n_fail

        # Don't poison the store with garbage when the API is broken mid-run
        if n_fail == n_att:
            fully_failed_properties += 1
            errs = amenities.get('_lookup_errors', ['?'])
            print(f"FAIL all {n_att} queries ({errs[0]}) — not storing")
            continue

        # Build summary for display
        parts = []
        for key in ['bakery', 'hospital', 'train_station', 'supermarket', 'airport']:
            info = amenities.get(key)
            if info:
                km = info.get('km', '?')
                parts.append(f"{key}:{km}km")
            else:
                parts.append(f"{key}:none")

        upsert(store, url, {'amenities': amenities})
        persist(store)  # incremental save — partial runs survive a timeout
        enriched += 1
        suffix = f"  [warn: {n_fail}/{n_att} queries failed]" if n_fail else ""
        print(f"OK ({', '.join(parts)}){suffix}")

    success_rate = round(100 * (total_attempted - total_failed_queries) / max(total_attempted, 1))
    print(f"\nEnriched {enriched}/{len(candidates)}")
    print(f"Query success: {success_rate}% ({total_attempted - total_failed_queries}/{total_attempted} succeeded)")
    if fully_failed_properties:
        print(f"WARN: {fully_failed_properties} properties had ALL queries fail — likely upstream issue.")

    if enriched > 0:
        persist(store)


if __name__ == '__main__':
    main()
