import asyncio
import csv
import os
from playwright.async_api import async_playwright

FAVORITES_URL = "https://www.properstar.nl/favorites"
OUTPUT_CSV = "extracted_property_urls.csv"
AUTH_JSON = "auth.json"

async def scrape_favorites():
    print("🚀 Stap 1: Scrapen van favorieten starten...\n")

    async with async_playwright() as p:
        # Check if auth.json exists
        auth_exists = os.path.exists(AUTH_JSON)

        if auth_exists:
            print("✅ auth.json gevonden - gebruik opgeslagen sessie")
            browser = await p.chromium.launch(headless=True)  # Headless als we sessie hebben
            context = await browser.new_context(storage_state=AUTH_JSON)
            page = await context.new_page()

            # Probeer direct naar favorites te gaan
            print("🌐 Navigeer naar favorietenpagina...")
            await page.goto(FAVORITES_URL, timeout=60000)  # 60 sec timeout
            await page.wait_for_load_state("load")

            # Check if we're logged in (kijk of er favorietenlijst is)
            await asyncio.sleep(2)
            listings = await page.query_selector_all("article.item-adaptive.card-basic")

            if not listings:
                print("❌ SESSIE VERLOPEN - handmatige login vereist")
                print("💡 Acties:")
                print("   1. Verwijder auth.json: rm auth.json")
                print("   2. Of gebruik: python3 validate_session.py invalidate")
                print("   3. Run opnieuw voor handmatige login")
                await browser.close()
                # Exit with error code so API knows it failed
                import sys
                sys.exit(1)

            print("✅ Ingelogd via opgeslagen sessie")

        else:
            print("⚠️  Geen auth.json gevonden - handmatige login vereist")
            browser = await p.chromium.launch(headless=False)  # Zichtbaar voor login
            context = await browser.new_context()
            page = await context.new_page()

            # Bezoek loginpagina
            print("🌐 Bezoek loginpagina van Properstar...")
            await page.goto("https://www.properstar.nl/favorites", timeout=60000)

            print("⏳ Wacht op handmatige inlog. Log nu in in het geopende venster.")
            print("⏳ Je hebt 10 minuten om in te loggen...")
            print("⏳ Het venster blijft open tot je ingelogd bent!")

            # Wait for user to login (check every 10 sec for up to 10 minutes)
            logged_in = False
            for i in range(60):  # 60 * 10 sec = 10 minutes
                await asyncio.sleep(10)
                try:
                    listings = await page.query_selector_all("article.item-adaptive.card-basic")
                    if listings and len(listings) > 0:
                        logged_in = True
                        print("✅ Login gedetecteerd!")
                        break
                except Exception as e:
                    # Page might be navigating, that's ok
                    pass

                if i % 6 == 0:  # Every minute (6 * 10 sec)
                    minutes_left = 10 - (i // 6)
                    print(f"⏳ Nog {minutes_left} minuten om in te loggen... (venster blijft open)")

            if not logged_in:
                print("❌ Login timeout - probeer opnieuw")
                await browser.close()
                return

            # Skip session saving for now - we'll save it after scraping
            print("⏳ Sessie wordt later opgeslagen (na scrapen)...")
            print("⏳ Navigeer naar favorieten...")
            await page.goto(FAVORITES_URL, timeout=60000)
            await page.wait_for_load_state("load")
            await asyncio.sleep(2)

        property_data = []

        page_number = 1
        while True:
            print(f"📄 Pagina {page_number} laden...")
            await page.wait_for_timeout(2000)  # kleine wachtpauze voor zekerheid

            listings = await page.query_selector_all("article.item-adaptive.card-basic")
            if not listings:
                print(f"⛔️ Geen listings gevonden op pagina {page_number}")
                break

            for listing in listings:
                # Sla over als gearchiveerd
                class_attr = await listing.get_attribute("class")
                if "archived" in class_attr:
                    continue

                # Try new /listing/ pattern first, fallback to old /property-for-sale pattern
                url_element = await listing.query_selector("a[href*='/listing/']")
                if not url_element:
                    url_element = await listing.query_selector("a[href*='/property-for-sale']")

                if url_element:
                    url = await url_element.get_attribute("href")
                    url = f"https://www.properstar.nl{url}" if url.startswith("/") else url
                else:
                    url = ""

                location_element = await listing.query_selector("div.item-location")
                location = await location_element.inner_text() if location_element else ""
                location = location.replace("Het juiste adres aanvragen", "").strip()

                price_element = await listing.query_selector("span[itemprop='price']")
                price_text = await price_element.inner_text() if price_element else ""
                price_number = ''.join(filter(str.isdigit, price_text))

                # Extract thumbnail image
                thumbnail_url = ""
                img_element = await listing.query_selector("img.item-pic")
                if img_element:
                    thumbnail_url = await img_element.get_attribute("src")
                    # If src is relative, make it absolute
                    if thumbnail_url and thumbnail_url.startswith("/"):
                        thumbnail_url = f"https://www.properstar.nl{thumbnail_url}"

                if url:
                    property_data.append({
                        "Property URL": url,
                        "Locatie": location,
                        "Prijs": price_number,
                        "Thumbnail": thumbnail_url
                    })

            # Probeer naar volgende pagina te gaan
            next_button = await page.query_selector("a[aria-label='Next page']")
            if next_button and await next_button.is_enabled():
                await next_button.click()
                page_number += 1
                await page.wait_for_load_state("load")
            else:
                break

        if property_data:
            # Load blacklist of manually removed properties
            import json
            from pathlib import Path

            removed_file = Path("removed_properties.json")
            manually_removed_urls = set()
            if removed_file.exists():
                try:
                    with open(removed_file, 'r', encoding='utf-8') as f:
                        manually_removed_urls = set(json.load(f))
                    if manually_removed_urls:
                        print(f"\n🚫 Loaded {len(manually_removed_urls)} manually removed URLs")
                except Exception as e:
                    print(f"⚠️  Could not load removed_properties.json: {e}")

            # Filter out manually removed properties
            original_count = len(property_data)
            property_data = [p for p in property_data if p.get('URL') not in manually_removed_urls]
            filtered_count = original_count - len(property_data)

            if filtered_count > 0:
                print(f"🗑️  Filtered out {filtered_count} manually removed properties")

            with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=["Property URL", "Locatie", "Prijs", "Thumbnail"])
                writer.writeheader()
                writer.writerows(property_data)
            print(f"\n💾 {len(property_data)} unieke vastgoedlinks opgeslagen in {OUTPUT_CSV}")
        else:
            print("⚠️ Geen data gevonden om op te slaan.")

        # Save session after scraping (if we had to login)
        if not auth_exists:
            print("\n⏳ Sessie opslaan...")
            try:
                await context.storage_state(path=AUTH_JSON)
                print("✅ auth.json opgeslagen voor toekomstig gebruik\n")
            except Exception as e:
                print(f"⚠️  Kan sessie niet opslaan: {e}")
                print("⚠️  Je moet opnieuw inloggen bij volgende run\n")

        await browser.close()

if __name__ == "__main__":
    asyncio.run(scrape_favorites())
