#!/bin/bash

# ============================================================================
# FARMMATCH FULL UPDATE SCRIPT
# ============================================================================
# One-click script to fully update the property database from Properstar
#
# This script runs the complete pipeline:
# 1. Scrape favorites from Properstar (with manual login if needed)
# 2. Extract breadcrumbs and location data
# 3. Remove 404/410 dead properties
# 4. Extract GPS coordinates from embedded maps
# 5. Geocode properties to GPS coordinates
# 6. Analyze properties against criteria
# 7. Update enriched_data.json for map viewer
#
# Usage: ./full_update.sh
# ============================================================================

set -e  # Exit on error

# Prefer venv Python if available
PY_BIN="../venv/bin/python3.14"
if [ ! -x "$PY_BIN" ]; then
  PY_BIN="../venv/bin/python3"
fi
if [ ! -x "$PY_BIN" ]; then
  PY_BIN="python3"
fi

# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

# Get script directory
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$SCRIPT_DIR"

echo -e "${CYAN}============================================================================${NC}"
echo -e "${CYAN}🚀 FARMMATCH FULL UPDATE - Starting Complete Pipeline${NC}"
echo -e "${CYAN}============================================================================${NC}"
echo ""

# ============================================================================
# CHECK FOR RUNNING PROCESSES
# ============================================================================
echo -e "${BLUE}🔍 Checking for running Paradisomatch processes...${NC}"

RUNNING_PROCESSES=$(ps aux | grep -E "(favorites_scraper|extract_breadcrumbs|extract_gps|geocode_|analyze_from_urls)" | grep python3 | grep -v grep || true)

if [ -n "$RUNNING_PROCESSES" ]; then
    echo -e "${YELLOW}⚠️  Found running Paradisomatch processes:${NC}"
    echo "$RUNNING_PROCESSES" | while read line; do
        echo -e "${YELLOW}   $line${NC}"
    done
    echo ""
    echo -e "${YELLOW}These processes may interfere with the update.${NC}"
    read -p "$(echo -e ${YELLOW}Kill these processes and continue? [y/N]: ${NC})" -n 1 -r
    echo
    if [[ $REPLY =~ ^[Yy]$ ]]; then
        echo -e "${BLUE}🧹 Cleaning up processes...${NC}"
        ./cleanup_processes.sh
        echo ""
    else
        echo -e "${RED}❌ Update cancelled. Please stop the running processes manually.${NC}"
        exit 1
    fi
else
    echo -e "${GREEN}✅ No conflicting processes found${NC}"
    echo ""
fi

# ============================================================================
# STEP 1: Scrape Favorites from Properstar
# ============================================================================
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}📥 STEP 1/7: Scraping Favorites from Properstar${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

if [ -f "auth.json" ]; then
    echo -e "${YELLOW}⚠️  Found existing auth.json - will try to use saved session${NC}"
    echo -e "${YELLOW}⚠️  If login fails, the script will prompt for manual login${NC}"
    echo ""
fi

"$PY_BIN" favorites_scraper.py
if [ $? -ne 0 ]; then
    echo ""
    echo -e "${RED}❌ Favorites scraping failed!${NC}"
    echo -e "${YELLOW}💡 Tips:${NC}"
    echo -e "${YELLOW}   - Make sure you logged in to Properstar in the browser window${NC}"
    echo -e "${YELLOW}   - Check if favorites_scraper.py exists${NC}"
    echo -e "${YELLOW}   - Try removing auth.json and running again: rm auth.json && ./full_update.sh${NC}"
    exit 1
fi

echo ""
echo -e "${GREEN}✅ Step 1 Complete: Favorites scraped successfully${NC}"
echo ""

# Ensure analysis_output.csv exists with URL column (skeleton from extracted_property_urls.csv if needed)
if [ ! -f "analysis_output.csv" ]; then
    echo -e "${YELLOW}ℹ️  analysis_output.csv not found; creating skeleton from extracted_property_urls.csv${NC}"
    python3 - <<'PY'
import pandas as pd
from pathlib import Path
csv_path = Path("extracted_property_urls.csv")
if not csv_path.exists():
    raise SystemExit("extracted_property_urls.csv missing")
df = pd.read_csv(csv_path)
if 'Property URL' not in df.columns:
    raise SystemExit("Property URL column missing in extracted_property_urls.csv")
out = pd.DataFrame({
    "URL": df["Property URL"],
    "Titel": "",
    "Samenvatting": "",
    "GPT Analyse": "",
    "Gewogen Score": ""
})
out.to_csv("analysis_output.csv", index=False)
print(f"Created analysis_output.csv with {len(out)} rows")
PY
    echo ""
fi

# Count properties
PROP_COUNT=$(grep -c "^https://" analysis_output.csv || echo "0")
echo -e "${CYAN}📊 Current property count: ${PROP_COUNT}${NC}"
echo ""

# ============================================================================
# STEP 2: Extract Breadcrumbs and Location Data
# ============================================================================
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}🍞 STEP 2/7: Extracting Breadcrumbs and Location Data${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

"$PY_BIN" extract_breadcrumbs.py
if [ $? -ne 0 ]; then
    echo ""
    echo -e "${RED}❌ Breadcrumb extraction failed!${NC}"
    exit 1
fi

echo ""
echo -e "${GREEN}✅ Step 2 Complete: Breadcrumbs extracted${NC}"
echo ""

# ============================================================================
# STEP 3: Remove 404/410 Dead Properties
# ============================================================================
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}🗑️  STEP 3/7: Removing 404/410 Dead Properties${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

python3 -c "
import sys
sys.path.insert(0, '.')
from check_availability import remove_404_properties
remove_404_properties(backup=True)
"

if [ $? -ne 0 ]; then
    echo ""
    echo -e "${YELLOW}⚠️  404 removal had issues (might be no 404s to remove)${NC}"
fi

echo ""
echo -e "${GREEN}✅ Step 3 Complete: Dead properties removed${NC}"
echo ""

# Update count after removal
PROP_COUNT=$(grep -c "^https://" analysis_output.csv || echo "0")
echo -e "${CYAN}📊 Active property count: ${PROP_COUNT}${NC}"
echo ""

# ============================================================================
# STEP 4: Extract GPS Coordinates from Embedded Maps
# ============================================================================
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}📍 STEP 4/7: Extracting GPS from Embedded Maps${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

"$PY_BIN" extract_gps_and_kpis.py
if [ $? -ne 0 ]; then
    echo ""
    echo -e "${YELLOW}⚠️  GPS extraction had issues (continuing anyway)${NC}"
fi

echo ""
echo -e "${GREEN}✅ Step 4 Complete: GPS coordinates extracted${NC}"
echo ""

# ============================================================================
# STEP 5: Geocode Properties to GPS Coordinates
# ============================================================================
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}🌍 STEP 5/7: Geocoding Properties${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

"$PY_BIN" geocode_with_breadcrumbs.py
if [ $? -ne 0 ]; then
    echo ""
    echo -e "${YELLOW}⚠️  Geocoding had issues (continuing anyway)${NC}"
fi

echo ""
echo -e "${GREEN}✅ Step 5 Complete: Properties geocoded${NC}"
echo ""

# ============================================================================
# STEP 6: Analyze Properties Against Criteria
# ============================================================================
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}🎯 STEP 6/7: Analyzing Properties Against Criteria${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

# Check if criteria API is running
if ! lsof -ti:5002 > /dev/null 2>&1; then
    echo -e "${YELLOW}⚠️  Criteria API not running on port 5002${NC}"
    echo -e "${YELLOW}Starting criteria_api.py in background...${NC}"
    "$PY_BIN" criteria_api.py > criteria_api.log 2>&1 &
    sleep 3
fi

"$PY_BIN" analyze_from_urls_optimized.py
if [ $? -ne 0 ]; then
    echo ""
    echo -e "${YELLOW}⚠️  Property analysis had issues${NC}"
    echo -e "${YELLOW}Make sure criteria_api.py is running: python3 criteria_api.py${NC}"
fi

echo ""
echo -e "${GREEN}✅ Step 6 Complete: Properties analyzed${NC}"
echo ""

# ============================================================================
# STEP 7: Update enriched_data.json for Map Viewer
# ============================================================================
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}🗺️  STEP 7/7: Updating Map Viewer Data${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

"$PY_BIN" parse_criteria.py
if [ $? -ne 0 ]; then
    echo ""
    echo -e "${RED}❌ Map data update failed!${NC}"
    exit 1
fi

echo ""
echo -e "${GREEN}✅ Step 7 Complete: Map viewer data updated${NC}"
echo ""

# ============================================================================
# FINAL SUMMARY
# ============================================================================
echo ""
echo -e "${CYAN}============================================================================${NC}"
echo -e "${GREEN}🎉 FULL UPDATE COMPLETE!${NC}"
echo -e "${CYAN}============================================================================${NC}"
echo ""

# Count final statistics
PROP_COUNT=$(grep -c "^https://" analysis_output.csv || echo "0")
COORD_COUNT=$(python3 -c "
import pandas as pd
df = pd.read_csv('analysis_output.csv')
print(df[df['Latitude'].notna() & df['Longitude'].notna()].shape[0])
" 2>/dev/null || echo "?")

PRICE_COUNT=$(python3 -c "
import pandas as pd
df = pd.read_csv('analysis_output.csv')
print(df[df['price'].notna()].shape[0])
" 2>/dev/null || echo "?")

echo -e "${CYAN}📊 Final Statistics:${NC}"
echo -e "${CYAN}   • Total properties: ${PROP_COUNT}${NC}"
echo -e "${CYAN}   • With coordinates: ${COORD_COUNT}${NC}"
echo -e "${CYAN}   • With price data: ${PRICE_COUNT}${NC}"
echo ""

echo -e "${GREEN}✨ Your Paradisomatch database is now fully updated!${NC}"
echo ""
echo -e "${CYAN}Next steps:${NC}"
echo -e "${CYAN}   • Open map viewer: http://localhost:8000/map_viewer_advanced.html${NC}"
echo -e "${CYAN}   • Or run: open http://localhost:8000/map_viewer_advanced.html${NC}"
echo ""

# Ask if user wants to open map viewer
read -p "$(echo -e ${YELLOW}Would you like to open the map viewer now? [y/N]: ${NC})" -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
    open "http://localhost:8000/map_viewer_advanced.html"
    echo -e "${GREEN}✅ Map viewer opened in browser${NC}"
fi

echo ""
echo -e "${CYAN}============================================================================${NC}"
