#!/usr/bin/env python3
"""
Extract structured data and keyword signals from property listing pages.

Visits each active property page with Playwright (Cloudflare-protected) and extracts:
- JSON-LD structured data (price, rooms, floor size, year built)
- DPE energy label (A-G) — renovation proxy
- Feature keywords (pool, water source, outbuildings, south-facing, septic)
- Property type classification
- Listing date / days on market
- Structured land size vs building size (from feature tables, not body regex)

Usage:
    python3 enrich_page_data.py                # Enrich all active properties missing data
    python3 enrich_page_data.py --limit 5      # Test on 5 properties
    python3 enrich_page_data.py --dry-run      # Show what would be scraped
    python3 enrich_page_data.py --force        # Re-scrape even if already enriched
"""
import argparse
import asyncio
import re
from datetime import datetime

from store import load, save, persist, upsert, is_active, short_url, browser_page, wait_for_cloudflare

# Keywords to detect (French/Dutch/English/Spanish/Italian/Portuguese)
KEYWORD_SIGNALS = {
    'has_pool': [
        'piscine', 'zwembad', 'swimming pool', 'pool', 'piscina',
    ],
    'has_water_source': [
        'puits', 'source', 'forage', 'waterput', 'well', 'spring', 'borehole',
        'étang', 'vijver', 'pond', 'lac', 'rivière', 'river', 'stream', 'ruisseau',
    ],
    'has_outbuildings': [
        'grange', 'dépendance', 'annexe', 'barn', 'outbuilding', 'schuur',
        'hangar', 'atelier', 'workshop', 'garage', 'remise', 'pigeonnier',
        'chai', 'séchoir', 'bergerie', 'écurie', 'stable',
    ],
    'has_south_facing': [
        'plein sud', 'sud', 'south-facing', 'south facing', 'orienté sud',
        'exposition sud', 'zuidgericht', 'op het zuiden',
    ],
    'has_septic': [
        'assainissement', 'fosse septique', 'septic', 'tout-à-l\'égout',
        'raccordé', 'riolering', 'mains drainage',
    ],
    'has_guest_potential': [
        'gîte', 'chambre d\'hôte', 'b&b', 'bed and breakfast', 'guest house',
        'gastenkamer', 'chambres d\'hôtes', 'maison d\'hôtes', 'holiday rental',
        'location saisonnière', 'vakantiewoning',
    ],
    'has_land_description': [
        'terrain', 'parcelle', 'hectare', 'prairie', 'pré', 'verger',
        'orchard', 'boomgaard', 'potager', 'kitchen garden', 'moestuin',
        'vignoble', 'vineyard', 'wijngaard', 'oliviers', 'olive',
    ],
    'has_electricity': [
        'électricité', 'electricité', 'electricity', 'elektriciteit',
        'mains electric', 'raccordé électricité', 'compteur électrique',
        'electric meter', 'electricidad', 'elettricità',
        'linky', 'fiber optic', 'fibre optique', 'glasvezel',
        'elektriciteitsnetwerk', 'rete elettrica', 'red eléctrica',
    ],
    'has_mains_water': [
        'eau courante', 'eau de ville', 'mains water', 'running water',
        'water supply', 'raccordé eau', 'compteur eau', 'water meter',
        'waterleiding', 'stromend water', 'agua corriente', 'acqua corrente',
        'waternetwerk', 'rete idrica', 'red de agua', 'warm water',
        'warmwatertoestel', 'eau chaude', 'hot water', 'chauffe-eau',
    ],
}


def needs_page_enrichment(prop, force=False):
    """Property needs page-level data extraction."""
    if not is_active(prop):
        return False
    if force:
        return True
    # Skip if we already have page-enriched data
    return not prop.get('page_enriched')


async def extract_page_data(page, url):
    """Visit a property page and extract all structured + keyword data."""
    try:
        await page.goto(url, wait_until='domcontentloaded', timeout=30000)
        await page.wait_for_timeout(2500)

        if not await wait_for_cloudflare(page):
            return None, 'cloudflare_blocked'

        # Extract everything in one evaluate call for efficiency
        data = await page.evaluate("""() => {
            const result = {};

            // === JSON-LD structured data ===
            const ldScripts = document.querySelectorAll('script[type="application/ld+json"]');
            for (const s of ldScripts) {
                try {
                    const d = JSON.parse(s.textContent || '{}');
                    // Single product/residence
                    if (d['@type'] && ['Product', 'Residence', 'House', 'Apartment',
                        'SingleFamilyResidence', 'RealEstateListing'].includes(d['@type'])) {
                        if (d.name) result.ld_name = d.name;
                        if (d.description) result.ld_description = (d.description || '').slice(0, 1000);
                        if (d.numberOfRooms) result.ld_rooms = parseInt(d.numberOfRooms);
                        if (d.numberOfBedrooms) result.ld_bedrooms = parseInt(d.numberOfBedrooms);
                        if (d.numberOfBathrooms) result.ld_bathrooms = parseInt(d.numberOfBathrooms);
                        if (d.floorSize) {
                            const fs = typeof d.floorSize === 'object' ? d.floorSize.value : d.floorSize;
                            result.ld_floor_size = parseFloat(fs);
                        }
                        if (d.lotSize) {
                            const ls = typeof d.lotSize === 'object' ? d.lotSize.value : d.lotSize;
                            result.ld_lot_size = parseFloat(ls);
                        }
                        if (d.yearBuilt) result.year_built = parseInt(d.yearBuilt);
                        // Price from offers
                        if (d.offers) {
                            const offer = Array.isArray(d.offers) ? d.offers[0] : d.offers;
                            if (offer.price) result.ld_price = parseFloat(String(offer.price).replace(/[^0-9.]/g, ''));
                            if (offer.priceCurrency) result.ld_currency = offer.priceCurrency;
                        }
                        // Address
                        if (d.address) {
                            const addr = typeof d.address === 'string' ? {} : d.address;
                            if (addr.addressLocality) result.ld_locality = addr.addressLocality;
                            if (addr.addressRegion) result.ld_region = addr.addressRegion;
                            if (addr.addressCountry) result.ld_country = addr.addressCountry;
                            if (addr.postalCode) result.ld_postcode = addr.postalCode;
                        }
                        // Geo
                        if (d.geo) {
                            if (d.geo.latitude) result.ld_lat = parseFloat(d.geo.latitude);
                            if (d.geo.longitude) result.ld_lon = parseFloat(d.geo.longitude);
                        }
                    }
                } catch (e) {}
            }

            // === DPE Energy Label ===
            const dpeSelectors = [
                '[class*="dpe"]', '[class*="energy"]', '[data-dpe]',
                '.energy-label', '.diagnostic', '[class*="diagnostic"]'
            ];
            for (const sel of dpeSelectors) {
                const el = document.querySelector(sel);
                if (el) {
                    const text = el.textContent.trim();
                    const match = text.match(/\\b([A-G])\\b/);
                    if (match) {
                        result.dpe = match[1];
                        break;
                    }
                }
            }
            if (!result.dpe) {
                const body = document.body.textContent;
                const dpePatterns = [
                    /DPE\\s*[:\\s]*([A-G])\\b/i,
                    /[Éé]nergie\\s*[:\\s]*([A-G])\\b/i,
                    /energy\\s*(?:class|label|rating)\\s*[:\\s]*([A-G])\\b/i,
                    /classe\\s*[éé]nerg[ée]tique\\s*[:\\s]*([A-G])\\b/i,
                ];
                for (const pat of dpePatterns) {
                    const m = body.match(pat);
                    if (m) {
                        result.dpe = m[1].toUpperCase();
                        break;
                    }
                }
            }

            // === Key-Value pairs (multi-platform extraction) ===
            const kvPairs = [];
            // Method 1: dt/dd pairs (generic HTML)
            document.querySelectorAll('dt').forEach(dt => {
                const dd = dt.nextElementSibling;
                if (dd && dd.tagName === 'DD') {
                    kvPairs.push({ label: dt.textContent.trim(), value: dd.textContent.trim() });
                }
            });
            // Method 2: Properstar — span.property-key + next sibling span
            document.querySelectorAll('.property-key, [class*="property-key"]').forEach(el => {
                const sib = el.nextElementSibling;
                if (sib) {
                    kvPairs.push({ label: el.textContent.trim(), value: sib.textContent.trim() });
                }
            });
            // Method 3: feature-content divs with two child spans
            document.querySelectorAll('.feature-content').forEach(div => {
                const spans = div.querySelectorAll(':scope > span');
                if (spans.length >= 2) {
                    kvPairs.push({ label: spans[0].textContent.trim(), value: spans[1].textContent.trim() });
                }
            });
            // Method 4: label class + sibling (various platforms)
            document.querySelectorAll('[class*="label"]:not(.property-key)').forEach(el => {
                const sib = el.nextElementSibling;
                if (sib) {
                    const label = el.textContent.trim();
                    const value = sib.textContent.trim();
                    if (label && value && label.length < 40 && value.length < 100) {
                        kvPairs.push({ label, value });
                    }
                }
            });
            // Method 5: th/td pairs (table-based layouts)
            document.querySelectorAll('tr').forEach(tr => {
                const th = tr.querySelector('th');
                const td = tr.querySelector('td');
                if (th && td) {
                    kvPairs.push({ label: th.textContent.trim(), value: td.textContent.trim() });
                }
            });
            // Deduplicate by label
            const seen = new Set();
            result.kv_pairs = kvPairs.filter(kv => {
                const key = kv.label.toLowerCase();
                if (seen.has(key)) return false;
                seen.add(key);
                return kv.label.length > 0 && kv.label.length < 50 && kv.value.length > 0;
            }).slice(0, 30);

            // === Feature table / structured KPIs ===
            const featureEls = document.querySelectorAll(
                '.features li, .property-features li, .detail-features li, ' +
                '.key-features li, .feature-item, ' +
                'table.features td, .specs li, .characteristics li, ' +
                '[class*="feature"] li, [class*="detail"] li'
            );
            const featureTexts = [];
            for (const el of featureEls) {
                featureTexts.push(el.textContent.trim().toLowerCase());
            }
            result.feature_texts = featureTexts.slice(0, 100);

            // === Photo URLs ===
            const photos = [];
            document.querySelectorAll(
                'img[src*="files-api"], img[src*="photo"], img[src*="listing"], ' +
                '[class*="gallery"] img, [class*="slider"] img, [class*="carousel"] img'
            ).forEach(img => {
                const src = img.src || img.dataset.src || '';
                if (src && !src.includes('logo') && !src.includes('avatar') && !src.includes('icon')) {
                    photos.push(src);
                }
            });
            result.photo_urls = [...new Set(photos)].slice(0, 15);

            // === Listing date ===
            const dateSelectors = [
                '[class*="date"]', '[class*="published"]', '[class*="listed"]',
                'time[datetime]', '[data-date]'
            ];
            for (const sel of dateSelectors) {
                const el = document.querySelector(sel);
                if (el) {
                    const dt = el.getAttribute('datetime') || el.textContent.trim();
                    if (dt && dt.match(/\\d{4}/)) {
                        result.listing_date = dt.slice(0, 30);
                        break;
                    }
                }
            }

            // === Property type from breadcrumb or meta ===
            const breadcrumb = document.querySelector('.breadcrumb, nav[aria-label="breadcrumb"]');
            if (breadcrumb) result.breadcrumb_text = breadcrumb.textContent.trim().slice(0, 200);

            const ogType = document.querySelector('meta[property="og:type"]');
            if (ogType) result.og_type = ogType.getAttribute('content');

            // === Full body text for keyword scanning ===
            result.body_text = document.body.textContent.slice(0, 10000);

            return result;
        }""")

        if not data:
            return None, 'no_data'

        return data, 'ok'

    except Exception as e:
        return None, str(e)[:100]


def parse_kv_pairs(kv_pairs):
    """Parse structured label→value pairs from listing pages (all platforms, all languages)."""
    fields = {}

    # Label mappings: normalized_key → list of label patterns (lowercase)
    LABEL_MAP = {
        'bedrooms': ['slaapkamers', 'slaapkamer', 'bedrooms', 'bedroom', 'chambres', 'chambre',
                      'camere da letto', 'camera', 'dormitorios', 'quartos'],
        'bathrooms': ['badkamers', 'badkamer', 'bathrooms', 'bathroom', 'salles de bain',
                       'salle de bain', 'bagni', 'bagno', 'baños', 'banheiros'],
        'rooms': ['kamers', 'kamer', 'rooms', 'room', 'pièces', 'stanze', 'habitaciones'],
        'building_size_m2': ['leven', 'living', 'woonoppervlakte', 'surface habitable',
                              'superficie', 'floor area', 'built area', 'oppervlakte'],
        'land_size_m2': ['kavel', 'kavelmaat', 'perceel', 'terrain', 'terreno', 'land',
                          'plot', 'grond', 'foncier', 'lot size', 'parcelle'],
        'total_size_m2': ['totaal', 'total', 'totale'],
        'year_built': ['bouwjaar', 'année de construction', 'year built', 'anno di costruzione',
                        'año de construcción', 'ano de construção', 'built'],
        'floors': ['vloeren', 'verdiepingen', 'floors', 'étages', 'piani', 'plantas'],
        'condition': ['voorwaarde', 'conditie', 'condition', 'état', 'stato', 'estado'],
        'heating': ['verwarming', 'chauffage', 'heating', 'riscaldamento', 'calefacción'],
        'hot_water': ['warm water', 'eau chaude', 'hot water', 'acqua calda', 'agua caliente'],
        'drainage': ['afvoer', 'riolering', 'assainissement', 'drainage', 'fognatura'],
        'garages': ['garages', 'garage', 'garages (buiten)', 'parking'],
        'property_type': ['type', 'type de bien', 'tipologia'],
    }

    for pair in kv_pairs:
        label = (pair.get('label') or '').strip().lower()
        value = (pair.get('value') or '').strip()
        if not label or not value:
            continue

        for field_key, patterns in LABEL_MAP.items():
            if any(label.startswith(p) or label == p for p in patterns):
                # Numeric fields: extract number
                if field_key in ('bedrooms', 'bathrooms', 'rooms', 'floors', 'garages', 'year_built'):
                    m = re.search(r'(\d+)', value)
                    if m:
                        val = int(m.group(1))
                        if field_key == 'year_built' and not (1400 <= val <= 2030):
                            continue
                        fields[field_key] = val
                # Size fields: extract m² value
                elif field_key in ('building_size_m2', 'land_size_m2', 'total_size_m2'):
                    m = re.search(r'([\d.,]+)\s*(?:m²|m2|sqm)?', value)
                    if m:
                        val_str = m.group(1).replace('.', '').replace(',', '.')
                        val = float(val_str)
                        if field_key == 'land_size_m2' and val < 100:
                            continue  # likely hectares or bad parse
                        if field_key == 'building_size_m2' and val > 5000:
                            continue  # sanity check
                        fields[field_key] = val
                # Text fields: store as-is
                else:
                    fields[field_key] = value
                break

    return fields


def parse_features(feature_texts):
    """Parse unstructured feature texts into typed fields (Leggett feature tags, etc.)."""
    fields = {}
    tags = []  # Preserve raw feature tags

    for text in feature_texts:
        original = text.strip()
        text = text.lower().strip()
        if not text:
            continue

        # Leggett-style bare size patterns: "97m2", "529m2"
        m = re.match(r'^(\d[\d.,]*)m[²2]?$', text)
        if m:
            val = float(m.group(1).replace(',', ''))
            # First m² is building, second is land (Leggett convention)
            if not fields.get('building_size_m2') and val < 1000:
                fields['building_size_m2'] = val
            elif not fields.get('land_size_m2') and val >= 100:
                fields['land_size_m2'] = val
            continue

        # Bedrooms
        m = re.search(r'(\d+)\s*(?:bed|chambre|slaap|camera|dormitorio|quarto)', text)
        if m:
            fields['bedrooms'] = int(m.group(1))

        # Bathrooms
        m = re.search(r'(\d+)\s*(?:bath|salle de bain|badkamer|bagno|baño|banheiro)', text)
        if m:
            fields['bathrooms'] = int(m.group(1))

        # Rooms total
        m = re.search(r'(\d+)\s*(?:room|pièce|kamer|stanza|habitaci|comodo)', text)
        if m and not fields.get('rooms'):
            fields['rooms'] = int(m.group(1))

        # Land size (labeled)
        land_m = (re.search(r'(?:terrain|land|perceel|grond|plot|parcelle|foncier).*?(\d[\d.,]*)\s*(?:m²|m2|sqm)', text)
                  or re.search(r'(\d[\d.,]*)\s*(?:m²|m2)\s*(?:terrain|land|perceel|grond|plot)', text))
        if land_m:
            val = float(land_m.group(1).replace(',', '').replace('.', ''))
            if val >= 500:
                fields['land_size_m2'] = val

        # Hectares
        ha_m = re.search(r'(\d[\d.,]*)\s*(?:hectare|ha)\b', text)
        if ha_m and not fields.get('land_size_m2'):
            val = float(ha_m.group(1).replace(',', '.'))
            fields['land_size_m2'] = val * 10000

        # Building/living size (labeled)
        build_m = (re.search(r'(?:living|habitable|woon|surface|floor|built).*?(\d[\d.,]*)\s*(?:m²|m2|sqm)', text)
                   or re.search(r'(\d[\d.,]*)\s*(?:m²|m2)\s*(?:living|habitable|woon)', text))
        if build_m:
            val = float(build_m.group(1).replace(',', '').replace('.', ''))
            if val < 1000:
                fields['building_size_m2'] = val

        # Year built
        m = re.search(r'(?:built|année|bouwjaar|constru).*?(\d{4})', text)
        if m:
            yr = int(m.group(1))
            if 1400 <= yr <= 2030:
                fields['year_built'] = yr

        # Heating type
        for heating in ['central heating', 'chauffage central', 'gas', 'electric',
                        'oil', 'fioul', 'pompe à chaleur', 'heat pump', 'wood',
                        'poêle', 'fireplace', 'cheminée', 'woodburner']:
            if heating in text:
                fields['heating'] = heating
                break

        # Preserve meaningful tags
        if original and len(original) < 60 and not re.match(r'^\d+m', text):
            tags.append(original)

    if tags:
        fields['feature_tags'] = tags

    return fields


def scan_keywords(body_text):
    """Scan body text for keyword signals. Returns dict of boolean flags."""
    text_lower = body_text.lower()
    signals = {}
    for signal_name, keywords in KEYWORD_SIGNALS.items():
        signals[signal_name] = any(kw in text_lower for kw in keywords)
    return signals


def estimate_renovation_score(dpe, year_built):
    """Estimate renovation scope score (1-5) from DPE and year built."""
    if not dpe and not year_built:
        return None

    score = 3  # default neutral

    if dpe:
        dpe_scores = {'A': 5, 'B': 5, 'C': 4, 'D': 3, 'E': 2, 'F': 1, 'G': 1}
        score = dpe_scores.get(dpe.upper(), 3)

    if year_built:
        if year_built >= 2000:
            score = min(score + 1, 5)
        elif year_built < 1900:
            score = max(score - 1, 1)

    return score


async def main():
    parser = argparse.ArgumentParser(description='Enrich properties from listing pages')
    parser.add_argument('--limit', type=int, default=0, help='Max properties to scrape (0=all)')
    parser.add_argument('--dry-run', action='store_true', help='Show what would be scraped')
    parser.add_argument('--force', action='store_true', help='Re-scrape already enriched')
    parser.add_argument('--headless', action='store_true', help='Run headless (default: headed)')
    args = parser.parse_args()

    store = load()
    candidates = [url for url, p in store.items()
                  if needs_page_enrichment(p, args.force)]

    if args.limit:
        candidates = candidates[:args.limit]

    print(f"Properties needing page enrichment: {len(candidates)}")

    if args.dry_run:
        for url in candidates[:10]:
            print(f"  {url}")
        if len(candidates) > 10:
            print(f"  ... and {len(candidates) - 10} more")
        return

    if not candidates:
        print("Nothing to do.")
        return

    enriched = 0
    failed = 0
    stats = {'price': 0, 'beds': 0, 'land': 0, 'dpe': 0, 'keywords': 0}

    async with browser_page(headless=args.headless) as page:
        for i, url in enumerate(candidates):
            prop_id = short_url(url, 35)
            print(f"  [{i+1}/{len(candidates)}] {prop_id}...", end=' ', flush=True)

            data, status = await extract_page_data(page, url)

            if not data:
                failed += 1
                print(f"FAIL ({status})")
                await page.wait_for_timeout(1000)
                continue

            fields = {'page_enriched': datetime.now().isoformat()}
            extracted = []

            # JSON-LD fields
            if data.get('ld_price') and data['ld_price'] > 10000:
                fields['price'] = data['ld_price']
                stats['price'] += 1
                extracted.append(f"price={data['ld_price']:.0f}")
            if data.get('ld_rooms'):
                fields['rooms'] = data['ld_rooms']
            if data.get('ld_bedrooms'):
                fields['bedrooms'] = data['ld_bedrooms']
            if data.get('ld_bathrooms'):
                fields['bathrooms'] = data['ld_bathrooms']
            if data.get('ld_floor_size'):
                fields['building_size_m2'] = data['ld_floor_size']
                extracted.append(f"build={data['ld_floor_size']:.0f}m2")
            if data.get('ld_lot_size'):
                fields['land_size_m2'] = data['ld_lot_size']
                stats['land'] += 1
                extracted.append(f"land={data['ld_lot_size']:.0f}m2")
            if data.get('year_built'):
                fields['year_built'] = data['year_built']
                extracted.append(f"yr={data['year_built']}")
            if data.get('ld_lat') and data.get('ld_lon'):
                fields['lat'] = data['ld_lat']
                fields['lon'] = data['ld_lon']
            if data.get('ld_postcode'):
                fields['postcode'] = data['ld_postcode']
            if data.get('ld_country'):
                fields['country'] = data['ld_country']
            if data.get('ld_description'):
                fields['description'] = data['ld_description']

            # DPE
            if data.get('dpe'):
                fields['dpe'] = data['dpe']
                stats['dpe'] += 1
                extracted.append(f"DPE={data['dpe']}")

            # Structured KV pairs (Properstar labels, etc.)
            kv_fields = parse_kv_pairs(data.get('kv_pairs', []))
            for k, v in kv_fields.items():
                if not store[url].get(k):
                    fields[k] = v
                    if k == 'bedrooms':
                        stats['beds'] += 1
                        extracted.append(f"beds={v}")
                    elif k == 'land_size_m2':
                        stats['land'] += 1
                        extracted.append(f"land={v:.0f}m2")
                    elif k == 'year_built':
                        extracted.append(f"yr={v}")
                    elif k == 'condition':
                        extracted.append(f"cond={v[:20]}")

            # Feature tag parsing (Leggett feature lists, etc.)
            feature_fields = parse_features(data.get('feature_texts', []))
            for k, v in feature_fields.items():
                if k == 'feature_tags':
                    fields['feature_tags'] = v
                    continue
                if not store[url].get(k) and not fields.get(k):
                    fields[k] = v
                    if k == 'bedrooms':
                        stats['beds'] += 1
                        extracted.append(f"beds={v}")
                    elif k == 'land_size_m2':
                        stats['land'] += 1
                        extracted.append(f"land={v:.0f}m2")

            # Keyword signals
            signals = scan_keywords(data.get('body_text', ''))
            active_signals = [k for k, v in signals.items() if v]
            if active_signals:
                fields['keyword_signals'] = active_signals
                stats['keywords'] += 1
                extracted.append(f"kw={len(active_signals)}")

            # Photos
            photo_urls = data.get('photo_urls', [])
            if photo_urls:
                fields['photo_urls'] = photo_urls
                fields['photo_count'] = len(photo_urls)
                extracted.append(f"photos={len(photo_urls)}")

            # Renovation score estimate
            yr = kv_fields.get('year_built') or feature_fields.get('year_built') or data.get('year_built')
            reno = estimate_renovation_score(data.get('dpe'), yr)
            if reno is not None:
                fields['renovation_score'] = reno

            # Listing date
            if data.get('listing_date'):
                fields['listing_date'] = data['listing_date']

            # Property type from breadcrumb
            bc = (data.get('breadcrumb_text') or '').lower()
            for ptype, keywords in [('farm', ['farm', 'ferme', 'boerderij']),
                                     ('chateau', ['château', 'chateau', 'kasteel', 'manor']),
                                     ('house', ['house', 'maison', 'huis', 'villa']),
                                     ('barn', ['barn', 'grange', 'schuur']),
                                     ('land', ['land', 'terrain', 'grond'])]:
                if any(kw in bc for kw in keywords):
                    fields['property_type'] = ptype
                    break

            upsert(store, url, fields)
            enriched += 1

            summary = ', '.join(extracted[:5]) if extracted else 'marked'
            print(f"OK ({summary})")

            # Save every 20 properties
            if (i + 1) % 20 == 0:
                save(store)
                print(f"  [saved progress]")

            await page.wait_for_timeout(1500)

    print(f"\nEnriched {enriched}/{len(candidates)} ({failed} failed)")
    print(f"  New prices: {stats['price']}")
    print(f"  New bedrooms: {stats['beds']}")
    print(f"  New land sizes: {stats['land']}")
    print(f"  DPE labels: {stats['dpe']}")
    print(f"  Keyword signals: {stats['keywords']}")

    if enriched > 0:
        persist(store)


if __name__ == '__main__':
    asyncio.run(main())
