#!/usr/bin/env python3
"""Stage-3 detail-page enrichment — the rich read.

Fetches the listing detail page (Green-Acres + Immonot; Leggett/Properstar are
bot-blocked) and extracts what the search-card data lacks:
  - bedrooms        (the B&B-capacity gate needs this; cards only have `rooms`)
  - description     (full text — lights up the typology/trees/pool/character
                     keyword scorers that are starved on card snippets)
  - renovation_score (1-5 from condition keywords — stops ruins ranking #1)

Writes these to the store. Run on the shortlist after scoring, then re-score.

Usage:
    python3 detail_enrich.py --top 30
    python3 detail_enrich.py --url <URL>
"""
from __future__ import annotations

import argparse
import json
import re
import sys
import time
from pathlib import Path

import requests

SCRIPT_DIR = Path(__file__).resolve().parent
if str(SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(SCRIPT_DIR))

from store import load, persist, upsert  # noqa: E402

UA = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
      'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15')

RE_BEDROOMS = re.compile(r'(\d+)\s*(?:bedroom|chambre|slaapkamer)', re.I)
RE_OG_DESC = re.compile(r'<meta property="og:description" content="([^"]+)"')

# Condition keyword banks -> renovation_score (1=ruin .. 5=move-in ready)
COND_RUIN = ['à restaurer', 'a restaurer', 'to restore', 'to renovate', 'à rénover',
             'a renover', 'ruine', 'ruin', 'gros œuvre', 'gros oeuvre', 'renovation project',
             'needs renovation', 'needs full', 'entièrement à rénover', 'tout à refaire',
             'uninhabitable', 'inhabitable', 'to finish', 'à terminer']
COND_HEAVY = ['travaux à prévoir', 'work required', 'needs work', 'travaux importants',
              'rafraîchir', 'to modernise', 'to modernize', 'rénovation partielle']
COND_GOOD = ['renovated', 'rénové', 'renove', 'restored', 'restauré', 'restaure',
             'move-in', 'turnkey', 'clé en main', 'cle en main', 'refait à neuf',
             'tastefully', 'beautifully renovated', 'fully renovated',
             'ready to move', 'immaculate', 'excellent condition', 'très bon état']


def condition_score(text: str) -> int | None:
    t = (text or '').lower()
    if not t:
        return None
    if any(k in t for k in COND_RUIN):
        return 1
    if any(k in t for k in COND_HEAVY):
        return 2
    if any(k in t for k in COND_GOOD):
        return 5
    return 3  # mentioned, neutral -> assume habitable-with-some-work


def enrich_one(url: str) -> dict | None:
    """Fetch detail page, return {bedrooms?, description?, renovation_score?}."""
    if 'green-acres' not in url and 'immonot' not in url:
        return None  # only fetchable sources
    try:
        r = requests.get(url, headers={'User-Agent': UA, 'Accept-Language': 'en'}, timeout=20)
    except requests.RequestException:
        return None
    if r.status_code != 200:
        return None
    t = r.text
    out = {}

    # bedrooms - modal plausible match (1..12)
    beds = [int(b) for b in RE_BEDROOMS.findall(t) if b.isdigit()]
    beds = [b for b in beds if 1 <= b <= 12]
    if beds:
        out['bedrooms'] = max(set(beds), key=beds.count)

    # description - og:description (concise, reliable)
    m = RE_OG_DESC.search(t)
    desc = m.group(1) if m else ''
    if desc:
        out['description'] = desc[:600]

    # condition -> renovation_score
    cs = condition_score(desc) or condition_score(t[:8000])
    if cs is not None:
        out['renovation_score'] = cs

    return out


def main():
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument('--top', type=int, default=30)
    ap.add_argument('--url')
    args = ap.parse_args()

    store = load()
    if args.url:
        targets = [args.url]
    else:
        s = json.load(open(SCRIPT_DIR / 'cyber_prairie_shortlist.json'))
        items = s if isinstance(s, list) else s.get('shortlist', list(s.values()) if isinstance(s, dict) else [])
        items.sort(key=lambda p: -float(p.get('cp_score', p.get('score', 0)) or 0))
        targets = [it['url'] for it in items[:args.top]]

    done = 0
    for url in targets:
        if url not in store:
            continue
        fields = enrich_one(url)
        if fields:
            upsert(store, url, fields)
            done += 1
            p = store[url]
            print(f"  {(p.get('city') or p.get('search_region') or '?')[:26]:26} "
                  f"beds={fields.get('bedrooms','?')} reno={fields.get('renovation_score','?')}")
        time.sleep(1.0)
    persist(store)
    print(f"\nDetail-enriched {done}/{len(targets)}")
    return 0


if __name__ == '__main__':
    sys.exit(main())
