"""
Geocode properties by city/region name from Locatie field
This provides approximate coordinates for map display
"""
import pandas as pd
import requests
import time

def geocode_city(location_name):
    """Geocode a city/region name using Nominatim"""
    if not location_name or pd.isna(location_name):
        return None, None

    try:
        # Clean up location name
        location = str(location_name).strip()

        # Use Nominatim API
        url = "https://nominatim.openstreetmap.org/search"
        params = {
            'q': location,
            'format': 'json',
            'limit': 1,
            'addressdetails': 1
        }
        headers = {'User-Agent': 'FarmMatch/1.0'}

        response = requests.get(url, params=params, headers=headers, timeout=10)
        data = response.json()

        if data and len(data) > 0:
            lat = float(data[0]['lat'])
            lon = float(data[0]['lon'])
            print(f"  ✓ {location}: {lat}, {lon}")
            return lat, lon
        else:
            print(f"  ✗ {location}: Not found")
            return None, None

    except Exception as e:
        print(f"  ✗ {location}: Error - {e}")
        return None, None

def main():
    print("🌍 Geocoding properties by city/region name...")
    print()

    # Load data
    df = pd.read_csv('analysis_output.csv')
    price_df = pd.read_csv('extracted_property_urls.csv')

    # Merge to get Locatie
    df = df.merge(price_df[['Property URL', 'Locatie']],
                  left_on='URL', right_on='Property URL', how='left', suffixes=('', '_new'))

    # Use new Locatie if available
    if 'Locatie_new' in df.columns:
        df['Locatie'] = df['Locatie_new'].fillna(df.get('Locatie', ''))
        df.drop(['Locatie_new', 'Property URL'], axis=1, inplace=True, errors='ignore')

    print(f"Total properties: {len(df)}")
    missing_coords = df[['Latitude', 'Longitude']].isna().any(axis=1).sum()
    print(f"Properties missing coordinates: {missing_coords}")
    print()

    # Geocode properties without coordinates
    geocoded = 0
    for idx, row in df.iterrows():
        # Skip if already has coordinates
        if pd.notna(row.get('Latitude')) and pd.notna(row.get('Longitude')):
            continue

        location = row.get('Locatie', '')
        if not location or pd.isna(location):
            continue

        print(f"[{idx+1}/{len(df)}] Geocoding: {location}")
        lat, lon = geocode_city(location)

        if lat and lon:
            df.at[idx, 'Latitude'] = lat
            df.at[idx, 'Longitude'] = lon
            df.at[idx, 'ExtractedLocation'] = location
            df.at[idx, 'LocationSource'] = 'city_geocode'
            geocoded += 1

            # Save progress every 10
            if geocoded % 10 == 0:
                df.to_csv('analysis_output.csv', index=False)
                print(f"  💾 Progress saved ({geocoded} geocoded)")

        # Rate limiting - be respectful to Nominatim
        time.sleep(1)

    # Final save
    df.to_csv('analysis_output.csv', index=False)

    print()
    print(f"✅ Geocoding complete!")
    print(f"   Successfully geocoded: {geocoded}")
    print(f"   Total with coordinates: {df[['Latitude', 'Longitude']].notna().all(axis=1).sum()}")
    print()
    print("📝 Note: These are approximate city/region coordinates.")
    print("   Properstar hides exact addresses until contact.")

if __name__ == '__main__':
    main()
