#!/usr/bin/env python3
"""Tier 2 urban detection — parcel-precise building density.

The definitive 'is this property in a town centre' signal. Two steps:
1. extract_parcel_coords(url): fetch the listing detail page and pull the
   property's own lat/lon (more precise than the commune-centre coords that
   geocoding produces from a city name).
2. building_density(lat, lon): count buildings within a radius via Overpass.
   A homestead has few buildings nearby; a town centre has many.

This supersedes the coarse name-list in cyber_prairie_score.compute_urban_penalty
for any property we've fetched a detail page for. Writes `building_density` +
`parcel_lat`/`parcel_lon` to the store; the scorer reads building_density.

Usage:
    python3 urban_density.py --top 30      # enrich top N shortlisted
    python3 urban_density.py --url <URL>   # single property
"""
from __future__ import annotations

import argparse
import json
import re
import sys
import time
from pathlib import Path

import requests

SCRIPT_DIR = Path(__file__).resolve().parent
if str(SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(SCRIPT_DIR))

from store import load, persist, upsert  # noqa: E402

UA = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
      'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15')
OVERPASS_URL = 'https://overpass-api.de/api/interpreter'

# Per-source coordinate extraction from detail-page HTML (verified 2026-05-28)
COORD_PATTERNS = {
    'green-acres.': [r'latitude["\s:=]+(-?\d+\.\d+)', r'longitude["\s:=]+(-?\d+\.\d+)'],
    'immonot.com': [r'"lat(?:itude)?"\s*[:=]\s*(-?\d+\.\d+)', r'"l(?:ng|on|ongitude)"\s*[:=]\s*(-?\d+\.\d+)'],
}


def extract_parcel_coords(url: str) -> tuple[float, float] | tuple[None, None]:
    """Fetch the listing detail page, return (lat, lon) or (None, None).

    Only handles directly-fetchable sources (Green-Acres, Immonot). Leggett +
    Properstar are bot-blocked; for those we keep the commune-centre coords.
    """
    patterns = None
    for host, pats in COORD_PATTERNS.items():
        if host in url:
            patterns = pats
            break
    if not patterns:
        return None, None
    try:
        r = requests.get(url, headers={'User-Agent': UA, 'Accept-Language': 'fr-FR'}, timeout=15)
        if r.status_code != 200:
            return None, None
        lat_m = re.search(patterns[0], r.text)
        lon_m = re.search(patterns[1], r.text)
        if lat_m and lon_m:
            return float(lat_m.group(1)), float(lon_m.group(1))
    except requests.RequestException:
        pass
    return None, None


def building_density(lat: float, lon: float, radius: int = 250) -> int | None:
    """Count buildings within `radius` metres via Overpass. None on failure."""
    q = (f'[out:json][timeout:25];'
         f'(way["building"](around:{radius},{lat},{lon});'
         f'relation["building"](around:{radius},{lat},{lon}););out count;')
    try:
        r = requests.post(OVERPASS_URL, data={'data': q},
                          headers={'Accept': 'application/json',
                                   'User-Agent': 'paradisomatch/1.0 (jonathanlooman@gmail.com)'},
                          timeout=30)
        if r.status_code != 200:
            return None
        data = r.json()
        # Overpass 'out count' returns a single element with tags.total
        els = data.get('elements', [])
        if els and 'tags' in els[0]:
            return int(els[0]['tags'].get('total', 0))
        return len(els)
    except (requests.RequestException, ValueError, KeyError):
        return None


def density_label(n: int) -> str:
    if n <= 8:   return 'isolated/rural'
    if n <= 30:  return 'hamlet/village-edge'
    if n <= 80:  return 'dense village / small-town'
    return 'town/city core'


def _haversine_m(lat1, lon1, lat2, lon2):
    import math
    R = 6371000
    dlat, dlon = math.radians(lat2 - lat1), math.radians(lon2 - lon1)
    a = (math.sin(dlat / 2) ** 2
         + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2) ** 2)
    return R * 2 * math.asin(math.sqrt(a))


def privacy_buildings(lat: float, lon: float, radius: int = 50) -> int | None:
    """Count buildings within `radius` m of the parcel. The property's own
    house/barn(s) count, so 1-2 ≈ private, 3-4 ≈ a close neighbour or two,
    5+ ≈ hemmed in. Tier-1 privacy signal (adjacent neighbours killed review #2)."""
    q = (f'[out:json][timeout:25];'
         f'(way["building"](around:{radius},{lat},{lon});'
         f'relation["building"](around:{radius},{lat},{lon}););out count;')
    try:
        r = requests.post(OVERPASS_URL, data={'data': q},
                          headers={'Accept': 'application/json',
                                   'User-Agent': 'paradisomatch/1.0 (jonathanlooman@gmail.com)'},
                          timeout=30)
        if r.status_code != 200:
            return None
        els = r.json().get('elements', [])
        if els and 'tags' in els[0]:
            return int(els[0]['tags'].get('total', 0))
        return len(els)
    except (requests.RequestException, ValueError, KeyError):
        return None


def nearest_major_road_m(lat: float, lon: float, radius: int = 300) -> float | None:
    """Distance (m) to the nearest motorway/trunk/primary/secondary road, or
    None if no major road within `radius`. Tier-1 road-noise signal (busy road
    killed review #4)."""
    q = (f'[out:json][timeout:25];'
         f'way["highway"~"^(motorway|trunk|primary|secondary)$"](around:{radius},{lat},{lon});'
         f'out geom;')
    try:
        r = requests.post(OVERPASS_URL, data={'data': q},
                          headers={'Accept': 'application/json',
                                   'User-Agent': 'paradisomatch/1.0 (jonathanlooman@gmail.com)'},
                          timeout=30)
        if r.status_code != 200:
            return None
        els = r.json().get('elements', [])
        best = None
        for w in els:
            for pt in w.get('geometry', []):
                d = _haversine_m(lat, lon, pt['lat'], pt['lon'])
                if best is None or d < best:
                    best = d
        return round(best) if best is not None else None
    except (requests.RequestException, ValueError, KeyError):
        return None


def main():
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument('--top', type=int, default=30, help='Enrich top N shortlisted')
    ap.add_argument('--url', help='Single property URL')
    args = ap.parse_args()

    store = load()

    if args.url:
        targets = [args.url]
    else:
        s = json.load(open(SCRIPT_DIR / 'cyber_prairie_shortlist.json'))
        items = s if isinstance(s, list) else s.get('shortlist', list(s.values()) if isinstance(s, dict) else [])
        items.sort(key=lambda p: -float(p.get('cp_score', p.get('score', 0)) or 0))
        targets = [it['url'] for it in items[:args.top]]

    done = 0
    for url in targets:
        p = store.get(url)
        if not p:
            continue
        # Prefer parcel coords from detail page; fall back to stored coords
        plat, plon = extract_parcel_coords(url)
        if plat:
            upsert(store, url, {'parcel_lat': plat, 'parcel_lon': plon})
            lat, lon = plat, plon
            time.sleep(1.0)
        else:
            lat, lon = p.get('lat'), p.get('lon')
        if not lat:
            print(f'  no coords: {p.get("city") or url[:50]}')
            continue
        n = building_density(lat, lon)
        fields = {}
        if n is not None:
            fields['building_density'] = n
        time.sleep(1.0)
        # Tier-1: privacy (nearest buildings) + road noise (nearest major road)
        priv = privacy_buildings(lat, lon)
        if priv is not None:
            fields['privacy_buildings_50m'] = priv
        time.sleep(1.0)
        road = nearest_major_road_m(lat, lon)
        fields['major_road_m'] = road  # None = no major road within 300m (good)
        if fields:
            upsert(store, url, fields)
            done += 1
            urban = '  <-- URBAN' if (n or 0) > 80 else ''
            privf = f' priv={priv}' if priv is not None else ''
            roadf = f' road={road}m' if road is not None else ' road=>300m'
            print(f'  {(p.get("city") or "?")[:28]:28} dens={n if n is not None else "?":>3}{privf}{roadf}{urban}')
        time.sleep(1.0)

    persist(store)
    print(f'\nEnriched {done}/{len(targets)} with building density')
    return 0


if __name__ == '__main__':
    sys.exit(main())
