"""Immonot.com — Notaires de France property listings.

Picked over pap.fr (Cloudflare-blocked 2026-05-25) because immonot serves
HTML cleanly to standard requests and represents a distinct market slice:
notary-listed properties, often estate sales / family transfers / divorce
splits — direct seller-to-buyer with the notary as intermediary. No agent
commission. Frequently the cheapest comparable properties in a département.

URL pattern (verified 2026-05-25):
  https://www.immonot.com/recherche-annonces-par-departement/VENT/MAIS/{dept_code}/Achat-Maison-{dept_slug}.html
  VENT = vente (sale, vs LOC = location/rent)
  MAIS = maison (vs APPT = appartement, TERR = terrain)

Card structure: div.il-card-content per listing, with:
  - a.reset-link[href^="/annonce-..."] (detail URL)
  - h2.il-card-title > strong.il-card-locale (city + postcode)
  - div.il-card-price > strong (price text)
  - Card text contains: "Intérieur X m 2", "Extérieur Y m", "Pièces N", "Chb. M"

Pagination not yet handled — first page returns ~12 results per département,
which is enough for the weekly cadence and small enough to not annoy the site.
"""
from __future__ import annotations

import re
import sys
import time
from pathlib import Path
from typing import Iterator

import requests
from bs4 import BeautifulSoup

SCRIPT_DIR = Path(__file__).resolve().parent.parent
if str(SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(SCRIPT_DIR))

from sources._base import PropertyHit, SearchCriteria, Source  # noqa: E402

# Region slug → (département INSEE code, département slug for the URL)
REGION_MAP = {
    'drome':           ('26', 'drome'),
    'ardeche':         ('07', 'ardeche'),
    'herault':         ('34', 'herault'),
    'gard':            ('30', 'gard'),
    'cotes-d-armor':   ('22', 'cotes-d-armor'),
    'morbihan':        ('56', 'morbihan'),
    'finistere':       ('29', 'finistere'),
    'dordogne':        ('24', 'dordogne'),
    'lot':             ('46', 'lot'),
    'creuse':          ('23', 'creuse'),
    'correze':         ('19', 'correze'),
    'charente':        ('16', 'charente'),
    'charente-maritime': ('17', 'charente-maritime'),
    # Added 2026-05-27 — Massif Central + Pyrenean + south-Languedoc widening
    'cantal':          ('15', 'cantal'),
    'aveyron':         ('12', 'aveyron'),
    'lozere':          ('48', 'lozere'),
    'tarn':            ('81', 'tarn'),
    'tarn-et-garonne': ('82', 'tarn-et-garonne'),
    'pyrenees-atlantiques': ('64', 'pyrenees-atlantiques'),
    'hautes-pyrenees': ('65', 'hautes-pyrenees'),
    'aude':            ('11', 'aude'),
    'pyrenees-orientales': ('66', 'pyrenees-orientales'),
}

BASE = 'https://www.immonot.com'
SEARCH_URL_TEMPLATE = BASE + '/recherche-annonces-par-departement/VENT/MAIS/{code}/Achat-Maison-{slug}.html'

UA = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
      'AppleWebKit/605.1.15 (KHTML, like Gecko) '
      'Version/17.4 Safari/605.1.15')

# Regexes for the card text body (city + price come from CSS selectors)
RE_SURFACE = re.compile(r'Int[ée]rieur\s+([\d ,.]+)\s*m\s*[²2]')
RE_LAND = re.compile(r'Ext[ée]rieur\s+([\d ,.]+)\s*m')
RE_ROOMS = re.compile(r'Pi[èe]ces\s+(\d+)')
RE_BEDS = re.compile(r'Chb\.\s*(\d+)')
RE_PRICE_DIGITS = re.compile(r'(\d[\d\s]*)\s*€')
RE_CITY_POSTCODE = re.compile(r'^(.+?)\s*-\s*(\d{5})\s*$')


def _to_int(text: str) -> int | None:
    """Strip spaces/commas/dots, return int or None."""
    if not text:
        return None
    digits = re.sub(r'[^\d]', '', text)
    return int(digits) if digits else None


class ImmonotSource(Source):
    name = 'immonot'
    countries = ['FR']
    requires_auth = False

    def __init__(self):
        self._session = requests.Session()
        self._session.headers.update({
            'User-Agent': UA,
            'Accept': 'text/html,application/xhtml+xml',
            'Accept-Language': 'fr-FR,fr;q=0.9',
        })

    def health(self) -> tuple[bool, str]:
        """GET probe against the search base URL (HEAD returns 405 on immonot)."""
        try:
            r = self._session.get(BASE + '/', timeout=8, allow_redirects=True)
            if r.status_code >= 400:
                return False, f'HTTP {r.status_code}'
            return True, 'reachable'
        except requests.RequestException as e:
            return False, f'{type(e).__name__}'

    def search(self, criteria: SearchCriteria,
               known_urls: set[str] | None = None) -> Iterator[PropertyHit]:
        if criteria.country != 'FR':
            return  # immonot is France-only
        if not criteria.region or criteria.region not in REGION_MAP:
            return  # unknown département slug
        known = known_urls or set()

        code, slug = REGION_MAP[criteria.region]
        url = SEARCH_URL_TEMPLATE.format(code=code, slug=slug)
        try:
            r = self._session.get(url, timeout=20)
        except requests.RequestException as e:
            print(f"    immonot: {type(e).__name__} on {criteria.region}")
            return
        if r.status_code != 200:
            print(f"    immonot: HTTP {r.status_code} on {criteria.region}")
            return

        soup = BeautifulSoup(r.text, 'html.parser')
        cards = soup.select('div.il-card-content')

        yielded = 0
        for card in cards:
            if criteria.limit and yielded >= criteria.limit:
                break

            link = card.select_one('a.reset-link[href^="/annonce-"]')
            if not link:
                continue
            href = link.get('href', '')
            prop_url = BASE + href if href.startswith('/') else href
            if prop_url in known:
                continue  # skip already-seen — saves parsing + keeps limit semantics honest

            # City + postcode from .il-card-locale
            city = None
            country_code = 'FR'
            locale_el = card.select_one('strong.il-card-locale')
            if locale_el:
                locale_text = locale_el.get_text(strip=True)
                m = RE_CITY_POSTCODE.match(locale_text)
                if m:
                    city = m.group(1).strip()

            # Price from .il-card-price
            price = None
            price_el = card.select_one('div.il-card-price')
            if price_el:
                price_text = price_el.get_text(' ', strip=True)
                m = RE_PRICE_DIGITS.search(price_text)
                if m:
                    price = _to_int(m.group(1))

            # Upstream filters
            if price is not None:
                if criteria.min_price and price < criteria.min_price:
                    continue
                if criteria.max_price and price > criteria.max_price:
                    continue

            # Surface, land, rooms, bedrooms from card text
            card_text = card.get_text(' ', strip=True)

            building_size = None
            land_size = None
            rooms = None
            bedrooms = None

            m = RE_SURFACE.search(card_text)
            if m:
                # "200,35" → 200 (truncate decimal); accept "200 35" too
                val = m.group(1).replace(',', '.').replace(' ', '')
                try:
                    building_size = int(float(val))
                except ValueError:
                    pass

            m = RE_LAND.search(card_text)
            if m:
                val = m.group(1).replace(',', '.').replace(' ', '')
                try:
                    land_size = int(float(val))
                except ValueError:
                    pass

            m = RE_ROOMS.search(card_text)
            if m:
                rooms = int(m.group(1))

            m = RE_BEDS.search(card_text)
            if m:
                bedrooms = int(m.group(1))

            # Apply size + bedroom upstream filters
            if criteria.min_building_m2 and (building_size or 0) < criteria.min_building_m2:
                continue
            if criteria.min_land_m2 and (land_size or 0) < criteria.min_land_m2:
                continue
            if criteria.min_bedrooms and (bedrooms or 0) < criteria.min_bedrooms:
                continue

            yield PropertyHit(
                url=prop_url,
                source='immonot',
                title=f"{city or criteria.region.title()} - immonot",
                price=price,
                city=city,
                country=country_code,
                building_size=building_size,
                land_size=land_size,
                rooms=rooms,
                bedrooms=bedrooms,
                search_region=criteria.region,
            )
            yielded += 1

        time.sleep(1)  # be polite — single département per second
