#!/usr/bin/env python3
"""
Paradisomatch Quality Report — Data completeness, score integrity, outcome metrics.

Three layers:
  1. Data Completeness Matrix — per-source field coverage
  2. Score Integrity Checks — hallucinated/blocked analysis, uniform scores
  3. Pipeline Outcome Metrics — score distribution, top-10 confidence

Usage:
    python3 quality_report.py                # Terminal summary (default)
    python3 quality_report.py --detail       # Per-property flags
    python3 quality_report.py --json         # Machine-readable output
"""
import argparse
import json
from collections import Counter
from pathlib import Path

from store import load, is_active, detect_source, get_score, short_url, STATUS_REMOVED

# Fields to check for completeness (matrix view — includes criteria and title)
COMPLETENESS_FIELDS = ['price', 'land_size_m2', 'lat', 'bedrooms', 'criteria', 'title']
# Fields for confidence scoring (KPI-focused — includes building_size)
CONFIDENCE_FIELDS = ['price', 'land_size_m2', 'lat', 'bedrooms', 'building_size_m2']


def prop_source(prop):
    """Get display name for property source."""
    src = detect_source(prop.get('url', ''))
    return src.title() if src != 'unknown' else 'Other'


def has_field(prop, field):
    """Check if a property has a non-empty value for a field."""
    val = prop.get(field)
    if val is None:
        return False
    if isinstance(val, str) and val.strip() == '':
        return False
    if isinstance(val, dict) and len(val) == 0:
        return False
    return True


def is_analysis_ok(prop):
    """Check if GPT analysis is real (not bot-blocked)."""
    title = (prop.get('title') or '').lower()
    analysis = prop.get('analysis') or ''
    if 'just a moment' in title or 'checking your browser' in title:
        return False
    if len(analysis) < 50:
        return False
    return True


# ─── Layer 1: Completeness ───

def completeness_matrix(props):
    """Per-source field coverage."""
    by_source = {}
    for p in props:
        src = prop_source(p)
        if src not in by_source:
            by_source[src] = {'count': 0, 'fields': {f: 0 for f in COMPLETENESS_FIELDS}, 'analysis_ok': 0}
        by_source[src]['count'] += 1
        for field in COMPLETENESS_FIELDS:
            if has_field(p, field):
                by_source[src]['fields'][field] += 1
        if is_analysis_ok(p):
            by_source[src]['analysis_ok'] += 1
    return by_source


# ─── Layer 2: Score Integrity ───

def detect_blocked_analysis(props):
    """Flag properties where GPT analyzed a bot-blocked page."""
    blocked = []
    for p in props:
        if not is_analysis_ok(p):
            blocked.append(p['url'])
        elif not (p.get('criteria') or {}) and (p.get('overall_score') or 0) > 0:
            blocked.append(p['url'])
    return blocked


def detect_uniform_scores(props):
    """Find criteria patterns that repeat 5+ times (likely hallucinated)."""
    patterns = Counter()
    url_by_pattern = {}
    for p in props:
        criteria = p.get('criteria') or {}
        if not criteria:
            continue
        # Normalize to tuple of sorted (key, value) pairs
        pattern = tuple(sorted((k, v) for k, v in criteria.items() if isinstance(v, (int, float))))
        if pattern:
            patterns[pattern] += 1
            url_by_pattern.setdefault(pattern, []).append(p['url'])

    suspicious = {}
    for pattern, count in patterns.items():
        if count >= 5:
            suspicious[str(dict(pattern))] = {
                'count': count,
                'urls': url_by_pattern[pattern][:5]  # sample
            }
    return suspicious


def detect_scores_without_data(props):
    """Properties with GPT scores > 0 but price/land/beds all null."""
    flagged = []
    for p in props:
        score = get_score(p)
        if score > 0:
            has_any = any(has_field(p, f) for f in ['price', 'land_size_m2', 'bedrooms'])
            if not has_any:
                flagged.append(p['url'])
    return flagged


# ─── Layer 3: Outcome Metrics ───

def score_histogram(scored_props):
    """Score distribution buckets."""
    buckets = {'4.0+': 0, '3.0-4.0': 0, '2.0-3.0': 0, '<2.0': 0}
    for p in scored_props:
        s = get_score(p)
        if s >= 4.0:
            buckets['4.0+'] += 1
        elif s >= 3.0:
            buckets['3.0-4.0'] += 1
        elif s >= 2.0:
            buckets['2.0-3.0'] += 1
        else:
            buckets['<2.0'] += 1
    return buckets


def assess_confidence(prop, blocked_urls):
    """Rate confidence in a property's score: high/medium/low."""
    flags = []
    dc = sum(1 for f in CONFIDENCE_FIELDS if has_field(prop, f))
    if dc <= 1:
        flags.append('thin_data')
    if prop.get('url') in blocked_urls:
        flags.append('blocked_analysis')
    if not prop.get('price'):
        flags.append('no_price')
    if not prop.get('lat'):
        flags.append('no_coordinates')

    if not flags:
        return 'high', dc, []
    if 'blocked_analysis' in flags or dc == 0:
        return 'low', dc, flags
    return 'medium', dc, flags


# ─── Report Generation ───

def generate_report(store):
    """Generate full quality report."""
    props = list(store.values())
    active = [p for p in props if is_active(p)]
    removed = len(props) - len(active)

    # Layer 1
    matrix = completeness_matrix(active)

    # Layer 2
    blocked = detect_blocked_analysis(active)
    uniform = detect_uniform_scores(active)
    unverifiable = detect_scores_without_data(active)

    # Layer 3
    scored = [p for p in active if get_score(p)]
    histogram = score_histogram(scored)

    # Top 10 confidence
    blocked_set = set(blocked)
    sort_key = lambda x: -get_score(x)
    top10 = sorted(scored, key=sort_key)[:10]
    top10_conf = [(p, *assess_confidence(p, blocked_set)) for p in top10]

    return {
        'total': len(props),
        'active': len(active),
        'removed': removed,
        'completeness': matrix,
        'blocked_count': len(blocked),
        'blocked_urls': blocked,
        'uniform_patterns': uniform,
        'unverifiable_count': len(unverifiable),
        'unverifiable_urls': unverifiable,
        'scored_count': len(scored),
        'histogram': histogram,
        'top10': top10_conf,
    }


def print_summary(report):
    """Print terminal-friendly summary."""
    print()
    print(f"  QUALITY REPORT")
    print(f"  {'=' * 60}")
    print(f"  Properties:  {report['total']} total | {report['active']} active | {report['removed']} removed")
    print(f"  Scored:      {report['scored_count']}")
    print()

    # Layer 1: Completeness Matrix
    print(f"  DATA COMPLETENESS (active properties)")
    print(f"  {'─' * 60}")
    header = f"  {'Source':<18} {'#':>4}  {'price':>6} {'land':>6} {'lat':>6} {'beds':>6} {'crit':>6} {'GPT':>6}"
    print(header)
    print(f"  {'─' * 60}")

    totals = {'count': 0, 'fields': {f: 0 for f in COMPLETENESS_FIELDS}, 'analysis_ok': 0}
    for src in sorted(report['completeness'].keys()):
        data = report['completeness'][src]
        n = data['count']
        totals['count'] += n
        totals['analysis_ok'] += data['analysis_ok']
        pcts = []
        for f in COMPLETENESS_FIELDS:
            v = data['fields'][f]
            totals['fields'][f] = totals['fields'].get(f, 0) + v
            pcts.append(f"{v*100//n:>5}%" if n else "    —")
        gpt_pct = f"{data['analysis_ok']*100//n:>5}%" if n else "    —"
        print(f"  {src:<18} {n:>4}  {'  '.join(pcts[:5])}  {gpt_pct}")

    n = totals['count']
    if n:
        print(f"  {'─' * 60}")
        pcts = [f"{totals['fields'][f]*100//n:>5}%" for f in COMPLETENESS_FIELDS]
        gpt_pct = f"{totals['analysis_ok']*100//n:>5}%"
        print(f"  {'Total':<18} {n:>4}  {'  '.join(pcts[:5])}  {gpt_pct}")
    print()

    # Layer 2: Integrity
    print(f"  SCORE INTEGRITY")
    print(f"  {'─' * 60}")
    if report['blocked_count']:
        print(f"  !! {report['blocked_count']} properties with blocked/hallucinated analysis")
    if report['uniform_patterns']:
        total_uniform = sum(v['count'] for v in report['uniform_patterns'].values())
        print(f"  !! {total_uniform} properties share {len(report['uniform_patterns'])} repeated score patterns")
    if report['unverifiable_count']:
        print(f"  !! {report['unverifiable_count']} properties scored without supporting data")
    if not report['blocked_count'] and not report['uniform_patterns'] and not report['unverifiable_count']:
        print(f"  OK No integrity issues detected")
    print()

    # Layer 3: Outcomes
    print(f"  SCORE DISTRIBUTION")
    print(f"  {'─' * 60}")
    max_count = max(report['histogram'].values()) if report['histogram'] else 1
    for bucket, count in report['histogram'].items():
        bar = '#' * (count * 30 // max(max_count, 1))
        print(f"  {bucket:>8}  {bar:<30}  {count}")
    print()

    # Top 10 Confidence
    print(f"  TOP 10 CONFIDENCE")
    print(f"  {'─' * 60}")
    for i, (prop, conf, dc, flags) in enumerate(report['top10']):
        score = get_score(prop)
        url_short = short_url(prop['url'], 35)
        icon = 'OK' if conf == 'high' else '!!' if conf == 'low' else '??'
        flag_str = f"  ({', '.join(flags)})" if flags else ''
        print(f"  {icon} #{i+1:>2}  {score:.1f}  data={dc}/5  {url_short}{flag_str}")
    print()

    # Actions
    actions = []
    if report['blocked_count']:
        actions.append(f"Re-analyze {report['blocked_count']} blocked properties after enrich_leggett scrapes real content")
    no_price = sum(1 for p, c, d, f in report['top10'] if 'no_price' in f)
    if no_price:
        actions.append(f"{no_price} top-10 properties missing price — manual check recommended")
    low_conf = sum(1 for p, c, d, f in report['top10'] if c == 'low')
    if low_conf:
        actions.append(f"{low_conf} top-10 properties have LOW confidence — verify before presenting")

    if actions:
        print(f"  RECOMMENDED ACTIONS")
        print(f"  {'─' * 60}")
        for i, action in enumerate(actions):
            print(f"  {i+1}. {action}")
        print()

    print(f"  {'=' * 60}")


def print_detail(report, store):
    """Print per-property flags."""
    blocked_set = set(report['blocked_urls'])
    unverifiable_set = set(report['unverifiable_urls'])

    print(f"\n  PROPERTY DETAIL FLAGS")
    print(f"  {'=' * 60}")

    for url, prop in sorted(store.items()):
        if not is_active(prop):
            continue
        flags = []
        if url in blocked_set:
            flags.append('BLOCKED')
        if url in unverifiable_set:
            flags.append('UNVERIFIABLE')
        if not prop.get('price'):
            flags.append('no_price')
        if not prop.get('lat'):
            flags.append('no_coords')
        dc = sum(1 for f in CONFIDENCE_FIELDS if has_field(prop, f))
        if dc <= 1:
            flags.append(f'data={dc}/5')
        if flags:
            url_short = short_url(url, 50)
            print(f"  {', '.join(flags):30s}  {url_short}")


def main():
    parser = argparse.ArgumentParser(description='Paradisomatch Quality Report')
    parser.add_argument('--detail', action='store_true', help='Show per-property flags')
    parser.add_argument('--json', action='store_true', help='Machine-readable JSON output')
    args = parser.parse_args()

    store = load()
    if not store:
        print("No properties in store. Run migrate.py first.")
        return

    report = generate_report(store)

    if args.json:
        # Serialize: remove full property objects from top10
        output = {**report}
        output['top10'] = [
            {'url': p['url'], 'score': get_score(p),
             'confidence': conf, 'data_completeness': dc, 'flags': flags}
            for p, conf, dc, flags in report['top10']
        ]
        del output['blocked_urls']
        del output['unverifiable_urls']
        print(json.dumps(output, indent=2))
    elif args.detail:
        print_summary(report)
        print_detail(report, store)
    else:
        print_summary(report)


if __name__ == '__main__':
    main()