#!/usr/bin/env python3
"""Refresh cp_score across the store + regenerate map_data.json from current scores.

The scorer (cyber_prairie_score.compute_cp_score) is pure — it doesn't persist
back into properties.json. After any criteria change (YAML edit, rubric update,
new gates), the on-disk score cache is stale until something writes the refreshed
values. This script is that something:

  1. Scores every active property under the current criteria_loader.CRITERIA
  2. Writes `cp_score` + `cp_gated`, `cp_gated_reasons` into properties.json
  3. Regenerates map_data.json from properties.json (schema-compatible with
     the existing map_viewer.html — keeps it working without UI changes)

Usage:
    python3 refresh_scores_and_map.py
    python3 refresh_scores_and_map.py --top 20   # also print top 20 to stdout
"""
from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

SCRIPT_DIR = Path(__file__).resolve().parent
if str(SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(SCRIPT_DIR))

from cyber_prairie_score import check_tier1_gates, compute_cp_score  # noqa: E402
from store import is_active, load, persist  # noqa: E402

MAP_DATA = SCRIPT_DIR / 'map_data.json'


def refresh_all_scores(store: dict) -> dict:
    """Score every active property; write cp_score + gate metadata back."""
    counts = {'scored': 0, 'gated': 0, 'errored': 0, 'inactive': 0}
    for url, p in store.items():
        if not is_active(p):
            counts['inactive'] += 1
            continue
        try:
            raw, final, scores, missing = compute_cp_score(p)
            triggered = check_tier1_gates(p, scores, final)
        except Exception as e:  # noqa: BLE001
            p['cp_score_error'] = str(e)[:120]
            counts['errored'] += 1
            continue
        p['cp_score'] = round(final, 2)
        p['cp_raw'] = round(raw, 2)
        p['cp_gated'] = bool(triggered)
        p['cp_gated_reasons'] = triggered if triggered else []
        if triggered:
            counts['gated'] += 1
        else:
            counts['scored'] += 1
    return counts


def _has_complete_new_system_data(p: dict) -> bool:
    """Has any new-system pipeline actually touched this property?

    Calibrated 2026-05-30 against real coverage. The strict reading (photos +
    verified land + at least one enrichment signal) leaves only 4 properties
    surviving, because land_size_m2 from our Ext-header harvester covers only
    25 of 392 active. That's too tight to be useful on a map.

    The honest bar instead: ANY of these means a new-system pipeline produced
    a real signal on this record (not just default-passed it):

      - character_score: the vision rubric pass ran
      - commune_population: community_vitality enriched it
      - amenities: lookup_amenities ran (hospital/grocery/etc populated)
      - photo_urls: the per-source photo harvester ran

    Properties surviving this filter have at least one of the four signals
    today's heaviest deltas depend on. Records that are pure legacy GPT-scoring
    with no recent enrichment are excluded.
    """
    return (
        p.get('character_score') is not None
        or isinstance(p.get('commune_population'), (int, float))
        or bool(p.get('amenities'))
        or bool(p.get('photo_urls'))
    )


def regenerate_map_data(store: dict, only_complete: bool = False) -> int:
    """Write map_data.json — schema kept compatible with existing map_viewer.html.

    Includes ALL active properties with coords (gated and ungated). The map
    treats score as a numeric ranking signal; the viewer can colour-code by
    pass/gate via the `gated` field when we upgrade the UI later.

    With only_complete=True, the cohort is restricted to properties that have
    been honestly evaluated under the new system (see _has_complete_new_system_data).
    """
    out = []
    excluded_incomplete = 0
    for url, p in store.items():
        if not is_active(p):
            continue
        if only_complete and not _has_complete_new_system_data(p):
            excluded_incomplete += 1
            continue
        lat, lon = p.get('lat'), p.get('lon')
        if not (isinstance(lat, (int, float)) and isinstance(lon, (int, float))):
            continue
        out.append({
            'url': url,
            'title': (p.get('title') or p.get('city') or '?')[:120],
            'score': p.get('cp_score') or 0,
            'gated': bool(p.get('cp_gated')),
            'gated_reasons': p.get('cp_gated_reasons', []),
            'lat': lat,
            'lon': lon,
            'location': p.get('city') or p.get('location') or '',
            'price': p.get('price'),
            'source': p.get('source', ''),
            'character_score': p.get('character_score'),
            'character_tags': p.get('character_tags', ''),
            'summary': (p.get('title') or '')[:200],
            'analysis': p.get('analysis', '') or '',
        })
    out.sort(key=lambda x: -float(x.get('score') or 0))
    with open(MAP_DATA, 'w', encoding='utf-8') as f:
        json.dump(out, f, indent=2, ensure_ascii=False)
    if only_complete:
        print(f'  (excluded {excluded_incomplete} active properties lacking new-system data)')
    return len(out)


def main():
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument('--top', type=int, default=10)
    ap.add_argument('--only-complete', action='store_true',
                    help='restrict map to properties with complete new-system data')
    args = ap.parse_args()

    print('Loading store...')
    store = load()
    print(f'  {len(store)} total properties')

    print('Scoring all active properties...')
    counts = refresh_all_scores(store)
    print(f"  scored (gate-pass): {counts['scored']}")
    print(f"  gated:              {counts['gated']}")
    print(f"  errored:            {counts['errored']}")
    print(f"  inactive (skipped): {counts['inactive']}")

    print('Persisting properties.json + enriched_data.json...')
    persist(store)

    print(f'Regenerating {MAP_DATA.name}'
          + (' (only-complete filter)' if args.only_complete else '')
          + '...')
    n = regenerate_map_data(store, only_complete=args.only_complete)
    print(f'  {n} properties on map')

    # Top N preview — restricted to the same cohort the map shows
    candidates = (p for u, p in store.items()
                  if is_active(p) and not p.get('cp_gated'))
    if args.only_complete:
        candidates = (p for p in candidates if _has_complete_new_system_data(p))
    top = sorted(candidates, key=lambda p: -float(p.get('cp_score') or 0))[: args.top]
    print(f'\n=== Top {args.top} (gate-passing, on map) ===')
    for i, p in enumerate(top, 1):
        sc = p.get('cp_score')
        c = p.get('city') or p.get('location') or '?'
        d = p.get('department') or p.get('search_region') or '?'
        pr = p.get('price') or 0
        src = p.get('source', '?')
        ch = p.get('character_score')
        ch_s = f'char {ch}/5' if ch is not None else 'char ?'
        print(f'{i:>2}. {sc:>4} [{src[:12]:12}] {c} ({d}) eur{pr:,} {ch_s}')
        print(f'     {p.get("url","")[:80]}')


if __name__ == '__main__':
    main()