#!/usr/bin/env python3
"""
Diagnose discrepancy between Properstar favorites and FarmMatch app
"""

import pandas as pd
import json

print("🔍 FARMMATCH DISCREPANCY DIAGNOSIS")
print("=" * 80)
print()

# Load data sources
df_extracted = pd.read_csv('extracted_property_urls.csv')
df_analysis = pd.read_csv('analysis_output.csv')

with open('enriched_data.json', 'r') as f:
    map_data = json.load(f)

try:
    with open('removed_properties.json', 'r') as f:
        removed = json.load(f)
except:
    removed = []

# URLs from each source
extracted_urls = set(df_extracted['URL'].tolist())
analysis_urls = set(df_analysis['URL'].tolist())
map_urls = set(p['url'] for p in map_data)
removed_urls = set(p.get('url', '') for p in removed)

# Calculate discrepancies
new_in_favorites = extracted_urls - analysis_urls
missing_from_map = analysis_urls - map_urls

print(f"📊 COUNTS:")
print(f"   Properstar Favorites (extracted_property_urls.csv): {len(extracted_urls)}")
print(f"   Analysis Database (analysis_output.csv): {len(analysis_urls)}")
print(f"   Map Viewer (enriched_data.json): {len(map_urls)}")
print(f"   Manually Removed (removed_properties.json): {len(removed_urls)}")
print()

print(f"🔍 DISCREPANCIES:")
print(f"   New in favorites (not analyzed yet): {len(new_in_favorites)}")
print(f"   Missing from map viewer: {len(missing_from_map)}")
print()

if new_in_favorites:
    print(f"{'='*80}")
    print(f"🆕 {len(new_in_favorites)} NEW PROPERTIES IN FAVORITES (Not yet analyzed)")
    print(f"{'='*80}")
    print()
    print("These properties were scraped from Properstar but haven't been")
    print("analyzed by GPT yet, so they're not in the FarmMatch app.")
    print()
    print("Sample of new properties:")
    for i, url in enumerate(list(new_in_favorites)[:10]):
        prop_data = df_extracted[df_extracted['URL'] == url].iloc[0]
        title = prop_data.get('Locatie', 'No title')
        print(f"   {i+1}. {title}")
        print(f"      {url}")

    if len(new_in_favorites) > 10:
        print(f"   ... and {len(new_in_favorites) - 10} more")
    print()

if missing_from_map:
    print(f"{'='*80}")
    print(f"⚠️  {len(missing_from_map)} PROPERTIES IN ANALYSIS BUT NOT IN MAP")
    print(f"{'='*80}")
    print()
    print("These properties are in analysis_output.csv but not enriched_data.json.")
    print("This shouldn't happen - parse_criteria.py should sync them.")
    print()

print(f"{'='*80}")
print(f"💡 SOLUTION")
print(f"{'='*80}")
print()

if new_in_favorites:
    print(f"✅ Run the full update pipeline to analyze new properties:")
    print()
    print(f"   Option 1 (Recommended): Run full update")
    print(f"   ./full_update.sh")
    print()
    print(f"   Option 2 (Manual): Run individual steps")
    print(f"   1. python3 analyze_from_urls_optimized.py")
    print(f"   2. python3 parse_criteria.py")
    print()
    print(f"   This will analyze the {len(new_in_favorites)} new properties and add them to the app.")
else:
    print(f"✅ No action needed - all favorites are analyzed and in the app!")

print()
