#!/usr/bin/env python3
"""Vision-based character scorer — runs on the Claude Code CLI (Max subscription),
NO external paid API.

Downloads each property's lead photo locally, then asks `claude -p` to read it and
rate architectural character (1-5). Replaced the old gpt-4o-mini/OpenAI path 2026-06-08
so the whole pipeline runs on the flat-rate Claude subscription — no OPENAI_API_KEY,
no metered calls.

Per-property `claude -p` invocation is slower than a batched HTTP API but cost is flat;
fine for incremental weekly deltas. Use --limit for a bounded run.

Usage:
    python3 character_score_vision.py            # score all active props w/ photo, no score
    python3 character_score_vision.py --url URL  # one property
    python3 character_score_vision.py --dry-run
    python3 character_score_vision.py --limit 5
    python3 character_score_vision.py --rescore  # also re-score ones already scored (e.g. drop old OpenAI scores)
"""
import argparse
import json
import os
import re
import subprocess
import sys
import tempfile

import requests

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from store import load, persist, upsert, is_active

UA = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 '
      '(KHTML, like Gecko) Chrome/120.0 Safari/537.36')

PROMPT = (
    "Read the image file at {path}. It is the lead photo of a rural property for sale. "
    "Rate its architectural character on a 1-5 scale: "
    "5 = exceptional (stone farmhouse, historic mill, period features — beams, fireplace, photogenic); "
    "4 = strong (genuine character, some period features or rural charm); "
    "3 = adequate (decent rural/country property, no strong personality); "
    "2 = weak (generic or tired, little architectural interest); "
    "1 = poor (bland modern box, mobile home, or derelict beyond recovery). "
    'Reply with ONLY one line of JSON and nothing else: {{"score": N, "tags": "2-5 word feature description"}}'
)


def get_lead_photo(prop):
    urls = prop.get('photo_urls') or []
    if urls:
        return urls[0]
    if prop.get('thumbnail'):
        return prop['thumbnail']
    cover = prop.get('cover_photos') or []
    return cover[0] if cover else None


def score_property(prop):
    url = get_lead_photo(prop)
    if not url:
        return None, 'no photo'
    tmp = None
    try:
        r = requests.get(url, headers={'User-Agent': UA}, timeout=20)
        if r.status_code != 200 or not r.content:
            return None, f'photo HTTP {r.status_code}'
        # Write into the script dir, NOT $TMPDIR — `claude -p` sandboxes its Read tool
        # to the working dir, so /var/folders temp files are unreadable (returns prose,
        # not JSON). Verified: a temp file beside the script is readable by claude -p.
        with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False,
                                         dir=os.path.dirname(os.path.abspath(__file__))) as f:
            f.write(r.content)
            tmp = f.name
        proc = subprocess.run(
            ['claude', '-p', PROMPT.format(path=tmp)],
            capture_output=True, text=True, timeout=180,
        )
        out = (proc.stdout or '').strip()
        m = re.search(r'\{.*?\}', out, re.S)
        if not m:
            return None, f'no json in: {out[:60]!r}'
        data = json.loads(m.group(0))
        score = int(data['score'])
        if not 1 <= score <= 5:
            return None, f'score out of range: {score}'
        return {'character_score': score, 'character_tags': data.get('tags', '')}, None
    except subprocess.TimeoutExpired:
        return None, 'claude -p timeout'
    except Exception as e:
        return None, f'error: {e}'
    finally:
        if tmp and os.path.exists(tmp):
            os.unlink(tmp)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--url', help='Score a specific property URL')
    parser.add_argument('--dry-run', action='store_true')
    parser.add_argument('--limit', type=int, default=0)
    parser.add_argument('--rescore', action='store_true',
                        help='also re-score properties that already have a character_score')
    args = parser.parse_args()

    store = load()

    if args.url:
        candidates = [store[args.url]] if args.url in store else []
    else:
        candidates = [
            p for p in store.values()
            if is_active(p)
            and (args.rescore or p.get('character_score') is None)
            and get_lead_photo(p) is not None
        ]
    if args.limit:
        candidates = candidates[:args.limit]

    print(f"Properties to score (via claude -p, subscription): {len(candidates)}")
    if args.dry_run:
        for p in candidates:
            print(f"  {p.get('url', '')[-60:]}")
        return

    ok = err = 0
    for i, p in enumerate(candidates, 1):
        url = p.get('url', '')
        result, error = score_property(p)
        if result:
            upsert(store, url, result)
            print(f"  [{i}/{len(candidates)}] ✓ {url[-46:]}  score={result['character_score']}  {result['character_tags']}")
            ok += 1
            if ok % 5 == 0:
                persist(store)
        else:
            print(f"  [{i}/{len(candidates)}] ✗ {url[-46:]}  {error}")
            err += 1

    if ok:
        persist(store)
    print(f"\nDone: {ok} scored, {err} errors")


if __name__ == '__main__':
    main()
