import asyncio
import csv
import os
import re
from playwright.async_api import async_playwright

FAVORITES_URL = "https://www.properstar.nl/favorites"
OUTPUT_CSV = "extracted_property_urls.csv"
AUTH_JSON = "auth.json"

async def scrape_favorites():
    print("🚀 Stap 1: Scrapen van favorieten starten...\n")

    if not os.path.exists(AUTH_JSON):
        print("🔐 auth.json niet gevonden. Start handmatige login...")
        await manual_login()
        if not os.path.exists(AUTH_JSON):
            print("❌ Login mislukt. auth.json niet aangemaakt.")
            return

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        context = await browser.new_context(storage_state=AUTH_JSON)
        page = await context.new_page()

        try:
            await page.goto(FAVORITES_URL, wait_until="load", timeout=30000)
        except Exception as e:
            print(f"❌ Fout tijdens laden van favorietenpagina: {e}")
            return

        all_properties = []
        seen_urls = set()
        page_number = 1

        while True:
            print(f"📄 Pagina {page_number} laden...")
            await page.wait_for_timeout(3000)

            listings = await page.query_selector_all("article.item-adaptive.card-basic")
            if not listings:
                print(f"⛔️ Geen listings gevonden op pagina {page_number}\n")
                break

            for listing in listings:
                # Sla 'archived' listings over
                class_attr = await listing.get_attribute("class")
                if class_attr and "archived" in class_attr:
                    continue

                # URL
                url_element = await listing.query_selector("a[itemprop='url']")
                url = await url_element.get_attribute("href") if url_element else None
                if not url:
                    continue
                full_url = f"https://www.properstar.nl{url}"
                if full_url in seen_urls:
                    continue

                # Locatie
                location_el = await listing.query_selector("div.item-location")
                location = await location_el.inner_text() if location_el else ""
                location_clean = location.replace("Het juiste adres aanvragen", "").strip()

                # Prijs
                price_el = await listing.query_selector("span[itemprop='price']")
                price_raw = await price_el.inner_text() if price_el else "€0"
                price_clean = re.sub(r"[^\d]", "", price_raw)

                all_properties.append({
                    "URL": full_url,
                    "Locatie": location_clean,
                    "Prijs": price_clean
                })
                seen_urls.add(full_url)

            # Volgende pagina knop zoeken
            next_btn = await page.query_selector("a.pagination-link[rel='next']")
            if next_btn:
                await next_btn.click()
                await page.wait_for_timeout(2000)
                page_number += 1
            else:
                break

        await browser.close()

        if all_properties:
            with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
                writer = csv.DictWriter(f, fieldnames=["URL", "Locatie", "Prijs"])
                writer.writeheader()
                writer.writerows(all_properties)
            print(f"\n💾 {len(all_properties)} unieke vastgoedlinks opgeslagen in {OUTPUT_CSV}")
        else:
            print("⚠️ Geen data gevonden om op te slaan.")

async def manual_login():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        context = await browser.new_context()
        page = await context.new_page()
        print("🌐 Openen inlogpagina...")
        await page.goto("https://www.properstar.nl/login", wait_until="load")
        print("⏳ Je hebt 2 minuten om in te loggen...")

        try:
            await page.wait_for_selector("a[href='/favorites']", timeout=120000)
            await page.goto(FAVORITES_URL)
            await page.wait_for_selector("article.item-adaptive.card-basic", timeout=10000)
            await context.storage_state(path=AUTH_JSON)
            print("✅ auth.json succesvol opgeslagen.")
        except Exception as e:
            print(f"❌ Login of opslag mislukt: {e}")
        finally:
            await browser.close()

if __name__ == "__main__":
    asyncio.run(scrape_favorites())
