#!/usr/bin/env python3
"""
frontier.py — surface the "diamonds from the rough" (V2-0a + V2-0b).

Over the comparable (vetted) set, compute value ratios and the Pareto
non-dominated frontier on (price down, CP score up). The frontier IS the
best-price/quality set: nothing on it is both cheaper AND better than another.

Completeness gate: a property must have price + land_m2 + a CP score to be
ranked. Incomplete ones are listed separately, never silently dropped.

Usage:
    python3 frontier.py            # vetted set (default)
    python3 frontier.py --all      # whole shortlist (incl. incomplete, flagged)
"""
import argparse
import json
from pathlib import Path

DIR = Path(__file__).parent


def load():
    sl = json.loads((DIR / 'cyber_prairie_shortlist.json').read_text())['shortlist']
    store = json.loads((DIR / 'properties.json').read_text())
    return sl, store


def is_vetted(p, store):
    """Full-data gate: live-checked + photos + analysis + real land size."""
    f = store.get(p.get('url'), {})
    if not f.get('availability_checked_at'):
        return False
    has_photos = bool(f.get('photo_urls') or f.get('thumbnail'))
    crit = len(f.get('criteria') or {})
    has_analysis = (f.get('character_score') is not None) or crit >= 4
    return has_photos and has_analysis and isinstance(f.get('land_size_m2'), (int, float))


def ratios(p):
    price = p.get('price') or 0
    land = p.get('land_m2') or 0
    bldg = p.get('building_m2') or 0
    cp = p.get('cp_score') or 0
    return {
        'eur_per_m2_building': round(price / bldg) if bldg else None,
        'eur_per_m2_land': round(price / land, 1) if land else None,
        'cp_per_100k': round(cp / (price / 100_000), 2) if price else None,
    }


def pareto_frontier(props):
    """Non-dominated on (price LOWER better, cp_score HIGHER better)."""
    frontier = []
    for a in props:
        pa, ca = a.get('price'), a.get('cp_score')
        if pa is None or ca is None:
            continue
        dominated = False
        for b in props:
            if b is a:
                continue
            pb, cb = b.get('price'), b.get('cp_score')
            if pb is None or cb is None:
                continue
            # b dominates a if b is no worse on both and strictly better on one
            if pb <= pa and cb >= ca and (pb < pa or cb > ca):
                dominated = True
                break
        if not dominated:
            frontier.append(a)
    return frontier


def fmt(p):
    r = ratios(p)
    price = p.get('price')
    return (
        f"  €{price:>8,}  CP {p.get('cp_score'):>4}  "
        f"land {(p.get('land_m2') or 0):>7,}m²  "
        f"€{r['eur_per_m2_land']}/m²-land  "
        f"CP/€100k {r['cp_per_100k']}\n"
        f"            {p.get('title','')[:55]}\n"
        f"            {p.get('url','')}"
    )


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--all', action='store_true', help='whole shortlist, flag incomplete')
    args = ap.parse_args()

    sl, store = load()
    pool = sl if args.all else [p for p in sl if is_vetted(p, store)]

    # Completeness gate (recompute predicate per item — no dict-equality membership test)
    def complete(p):
        return bool(p.get('price') and p.get('land_m2') and p.get('cp_score') is not None)
    rankable = [p for p in pool if complete(p)]
    incomplete = [p for p in pool if not complete(p)]

    frontier = pareto_frontier(rankable)
    frontier.sort(key=lambda p: ratios(p)['cp_per_100k'] or 0, reverse=True)

    print(f"\n{'='*70}")
    print(f"  DIAMONDS — Pareto frontier (best CP per € spent)")
    print(f"  pool: {len(pool)}  rankable: {len(rankable)}  on frontier: {len(frontier)}")
    print(f"{'='*70}\n")
    for p in frontier:
        print(fmt(p) + "\n")

    if incomplete:
        print(f"{'-'*70}")
        print(f"  NEEDS ENRICHMENT (not rankable — missing data): {len(incomplete)}")
        for p in incomplete:
            miss = p.get('missing') or [k for k in ('price', 'land_m2', 'cp_score') if not p.get(k)]
            print(f"    {p.get('title','')[:45]:45}  missing: {miss}  {p.get('url','')[:45]}")


if __name__ == '__main__':
    main()
