#!/usr/bin/env python3
"""
Enrich properties with hazard data from the public Géorisques API (no token required).

For each property with lat/lon:
- Calls: https://www.georisques.gouv.fr/api/v1/rpg?lat={lat}&lon={lon}
- Collects hazard types and count
- Updates analysis_output.csv and enriched_data.json
- If a property has hazards and no risk_profile, sets risk_profile and risk_profile_objective to 'Hoog'

Run:
  ../venv/bin/python3.14 georisques_enrich.py
"""
import json
import time
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import pandas as pd
import requests

ANALYSIS_CSV = Path("analysis_output.csv")
ENRICHED_JSON = Path("enriched_data.json")
API_URL = "https://www.georisques.gouv.fr/api/v1/rpg"


def extract_hazards(payload: Dict) -> Tuple[List[str], int]:
    hazards = []
    # The API returns a list under "risques" or "data" depending on endpoint
    for key in ["risques", "data"]:
        items = payload.get(key, [])
        if isinstance(items, list):
            for item in items:
                for k in ["type_risque", "risque", "code_risque", "libelle", "intitule"]:
                    val = item.get(k)
                    if val:
                        hazards.append(str(val).strip())
                        break
    hazards = sorted(set(hazards))
    return hazards, len(hazards)


def fetch_georisques(lat: float, lon: float) -> Tuple[List[str], int, Optional[Dict]]:
    params = {"lat": lat, "lon": lon}
    try:
        resp = requests.get(API_URL, params=params, timeout=20)
        if resp.status_code != 200:
            return [], 0, None
        payload = resp.json()
        hazards, count = extract_hazards(payload)
        return hazards, count, payload
    except Exception:
        return [], 0, None


def update_csv(hazard_map: Dict[str, Tuple[List[str], int]]):
    if not ANALYSIS_CSV.exists():
        print(f"❌ {ANALYSIS_CSV} not found.")
        return 0
    df = pd.read_csv(ANALYSIS_CSV)
    for col in ["georisques_types", "georisques_count", "risk_profile", "risk_profile_objective"]:
        if col not in df.columns:
            df[col] = None
    updated = 0
    for idx, row in df.iterrows():
        url = row.get("URL")
        if url in hazard_map:
            hazards, count = hazard_map[url]
            df.at[idx, "georisques_types"] = ";".join(hazards) if hazards else ""
            df.at[idx, "georisques_count"] = count
            if count > 0 and pd.isna(row.get("risk_profile")):
                df.at[idx, "risk_profile"] = "Hoog"
            if count > 0 and pd.isna(row.get("risk_profile_objective")):
                df.at[idx, "risk_profile_objective"] = "Hoog"
            updated += 1
    df.to_csv(ANALYSIS_CSV, index=False, encoding="utf-8")
    return updated


def update_json(hazard_map: Dict[str, Tuple[List[str], int]], raw_map: Dict[str, Dict]):
    if not ENRICHED_JSON.exists():
        print(f"❌ {ENRICHED_JSON} not found.")
        return 0
    data = json.loads(ENRICHED_JSON.read_text(encoding="utf-8"))
    updated = 0
    for prop in data:
        url = prop.get("url")
        if url in hazard_map:
            hazards, count = hazard_map[url]
            prop["georisques_types"] = hazards
            prop["georisques_count"] = count
            if count > 0 and not prop.get("risk_profile"):
                prop["risk_profile"] = "Hoog"
            if count > 0 and not prop.get("risk_profile_objective"):
                prop["risk_profile_objective"] = "Hoog"
            if url in raw_map:
                prop["georisques_raw"] = raw_map[url]
            updated += 1
    ENRICHED_JSON.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
    return updated


def main():
    if not ANALYSIS_CSV.exists():
        print(f"❌ {ANALYSIS_CSV} not found in {ANALYSIS_CSV.parent}")
        return

    df = pd.read_csv(ANALYSIS_CSV)
    targets = df[(df["Latitude"].notna()) & (df["Longitude"].notna())]
    print(f"🌍 Properties with coords: {len(targets)}")

    hazard_map = {}
    raw_map = {}

    for idx, row in targets.iterrows():
        url = row.get("URL")
        lat = row.get("Latitude")
        lon = row.get("Longitude")
        try:
            latf = float(lat)
            lonf = float(lon)
        except Exception:
            continue
        hazards, count, raw = fetch_georisques(latf, lonf)
        hazard_map[url] = (hazards, count)
        if raw:
            raw_map[url] = raw
        if (idx + 1) % 20 == 0:
            time.sleep(1)  # be gentle to the API

    csv_updated = update_csv(hazard_map)
    json_updated = update_json(hazard_map, raw_map)
    print(f"✅ Updated CSV hazards for {csv_updated} properties")
    print(f"✅ Updated JSON hazards for {json_updated} properties")
    print("Re-run quality_gate.py to verify coverage.")


if __name__ == "__main__":
    main()
