#!/usr/bin/env python3
"""Community vitality — commune population via geo.api.gouv.fr (INSEE-sourced, free, no auth).

The research's #1 theme: a dying/too-remote commune kills a retreat (Limousin
projects relocated for lack of footfall); a healthy village = services + guest
access. Population is the accessible proxy. Writes `commune_population` to the
store; cyber_prairie_score.community_vitality_adjustment() scores it (reward the
500-10k 'alive village' band, penalise too-remote <150 and urban >25k).

Usage:
    python3 community_vitality.py --top 30      # enrich top N shortlisted
    python3 community_vitality.py --url <URL>
"""
from __future__ import annotations

import argparse
import json
import sys
import time
from pathlib import Path

import requests

SCRIPT_DIR = Path(__file__).resolve().parent
if str(SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(SCRIPT_DIR))

from store import load, persist, upsert  # noqa: E402

GEO_API = 'https://geo.api.gouv.fr/communes'

# search_region slug → INSEE département code
DEPT_CODE = {
    'drome': '26', 'ardeche': '07', 'herault': '34', 'gard': '30',
    'cotes-d-armor': '22', 'morbihan': '56', 'finistere': '29', 'dordogne': '24',
    'lot': '46', 'creuse': '23', 'correze': '19', 'charente': '16',
    'charente-maritime': '17', 'cantal': '15', 'aveyron': '12', 'lozere': '48',
    'tarn': '81', 'tarn-et-garonne': '82', 'pyrenees-atlantiques': '64',
    'hautes-pyrenees': '65', 'aude': '11', 'pyrenees-orientales': '66',
    'gers': '32', 'mayenne': '53', 'orne': '61', 'manche': '50', 'vendee': '85',
    'vienne': '86', 'calvados': '14',
}


def _clean_commune(name: str) -> str:
    """Strip region suffix: 'Tarbes (Hautes-Pyrénées)' → 'Tarbes'."""
    if not name:
        return ''
    return name.split('(')[0].strip()


def fetch_population(commune: str, dept_code: str | None) -> int | None:
    """Population for a commune (by name + optional département). Retries once."""
    name = _clean_commune(commune)
    if not name:
        return None
    params = {'nom': name, 'fields': 'nom,population,codeDepartement', 'limit': 5}
    if dept_code:
        params['codeDepartement'] = dept_code
    for attempt in (1, 2):
        try:
            r = requests.get(GEO_API, params=params, timeout=15)
            if r.status_code != 200:
                return None
            data = r.json()
            if not data:
                return None
            # exact-ish name match preferred, else first
            for c in data:
                if c.get('nom', '').lower() == name.lower() and c.get('population'):
                    return int(c['population'])
            return int(data[0]['population']) if data[0].get('population') else None
        except requests.RequestException:
            if attempt == 2:
                return None
            time.sleep(1.0)
    return None


def fetch_population_by_latlon(lat, lon) -> int | None:
    """Population of the commune CONTAINING a coordinate — point lookup, no name
    matching. Reliable fallback when the commune name doesn't resolve (foreign
    listing portals, accented/variant names)."""
    if lat is None or lon is None:
        return None
    try:
        r = requests.get(GEO_API, params={'lat': lat, 'lon': lon, 'fields': 'nom,population'}, timeout=15)
        if r.status_code != 200:
            return None
        data = r.json()
        if data and data[0].get('population'):
            return int(data[0]['population'])
    except requests.RequestException:
        return None
    return None


def main():
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument('--top', type=int, default=30)
    ap.add_argument('--url')
    args = ap.parse_args()

    store = load()
    if args.url:
        targets = [args.url]
    else:
        s = json.load(open(SCRIPT_DIR / 'cyber_prairie_shortlist.json'))
        items = s if isinstance(s, list) else s.get('shortlist', list(s.values()) if isinstance(s, dict) else [])
        items.sort(key=lambda p: -float(p.get('cp_score', p.get('score', 0)) or 0))
        targets = [it['url'] for it in items[:args.top]]

    done = 0
    for url in targets:
        p = store.get(url)
        if not p:
            continue
        if (p.get('country') or 'FR').upper() != 'FR':
            continue  # geo.api is France-only
        region = (p.get('search_region') or '').strip().lower()
        dept = DEPT_CODE.get(region)
        pop = fetch_population(p.get('city') or p.get('search_region') or '', dept)
        if pop is None:  # name lookup failed → resolve commune by coordinate
            pop = fetch_population_by_latlon(p.get('lat'), p.get('lon'))
        if pop is not None:
            upsert(store, url, {'commune_population': pop})
            done += 1
            band = ('too-remote' if pop < 150 else 'small' if pop < 500
                    else 'alive village' if pop <= 10000 else 'town' if pop <= 25000 else 'urban')
            print(f'  {(p.get("city") or "?")[:30]:30} pop={pop:>6} ({band})')
        time.sleep(0.6)
    persist(store)
    print(f'\nEnriched {done}/{len(targets)} with commune population')
    return 0


if __name__ == '__main__':
    sys.exit(main())
