+
    $Hje                        R t ^ RIHt ^ RIHtHt ^ RIHtHt ^ RI	H
t
 ] ! R R4      4       t] ! R R4      4       t ! R	 R
]4      t ! R R]4      tR# )ad  Property-source contract.

Every source module in this directory exposes one class that implements
`Source`. The orchestrator (search_v2.py) discovers them, calls health()
to gate pre-flight, then iterates search() per campaign.

Adding a new platform = drop a new file in here + register it in
sources/__init__.py registry + add to campaigns.yaml. No orchestrator
changes.

Design constraints (6L):
- Sources MUST NOT touch the store directly; they yield normalized dicts
  and the orchestrator handles persistence + dedup.
- Sources SHOULD apply upstream criteria filters in their URL/payload when
  the platform supports it. Downstream filtering is wasteful.
- Sources MUST distinguish 'genuinely no results' from 'query failed'.
  health() should fail loud when the platform is unreachable, so the
  orchestrator can skip cleanly rather than record empty results.
)annotations)ABCabstractmethod)	dataclassfield)Iteratorc                      ] tR t^t$ RtR]R&   RtR]R&   RtR]R&   RtR]R	&   Rt	R]R
&   Rt
R]R&   RtR]R&   RtR]R&   ^tR]R&   RtR]R&   RtR# )SearchCriteriazUnified search filters. Sources translate these into platform-specific URLs/payloads.

Attributes left as None mean 'no constraint'. country is required because
most platforms are single-country.
strcountryN
str | Noneregion
department
int | None	min_price	max_pricemin_land_m2min_building_m2min_bedroomsint	max_pageslimit )__name__
__module____qualname____firstlineno____doc____annotations__r   r   r   r   r   r   r   r   r   __static_attributes__r       U/Users/jonathan/Documents/Zakelijk/ClaudeOS/03_Lab/farmmatch/scraper/sources/_base.pyr	   r	      sh    
 LFJ!J
! Iz  Iz "K""&OZ&#L*#IsE:r    r	   c                      ] tR t^.t$ RtR]R&   R]R&   RtR]R&   RtR]R	&   RtR]R
&   Rt	R]R&   Rt
R]R&   RtR]R&   RtR]R&   RtR]R&   RtR]R&   RtR]R&   ]! ]R7      tR]R&   R R ltRtR# )PropertyHita  Normalized property record yielded by Source.search().

Only `url` and `source` are required. Everything else is best-effort.
The orchestrator upserts these into the store; downstream pipeline steps
(geocode, amenities, analyze, enrich, score) fill the rest.
r
   urlsourceNr   titler   pricecityr   building_size	land_sizebedroomsrooms	thumbnailsearch_region)default_factorydictextrac                   V ^8  d   QhRR/# )   returnr0   r   )formats   "r!   __annotate__PropertyHit.__annotate__D   s     
 
 
r    c                    RV P                   RV P                  /pR F  p\        W4      pVf   K  W1V&   K  	  V P                  '       d   VP	                  V P                  4       V# )z6Convert to the flat dict shape store.upsert() expects.r$   r%   )
r&   r'   r(   r   r)   r*   r+   r,   r-   r.   )r$   r%   getattrr1   update)selfoutkvs   &   r!   to_store_fieldsPropertyHit.to_store_fieldsD   s\    dhh$++6EA A}A	E
 :::JJtzz"
r    r   )r   r   r   r   r   r   r&   r'   r(   r   r)   r*   r+   r,   r-   r.   r   r0   r1   r?   r   r   r    r!   r#   r#   .   s     
HKE:E:D*GZ $M:$ Iz HjE: Iz  $M:$-E4-
 
r    r#   c                  n    ] tR t^Qt$ RtR]R&   R]R&   RtR]R&   ]R	 R
 l4       t]RR R ll4       t	Rt
R# )Sourcez*Contract every property source implements.r
   name	list[str]	countriesFboolrequires_authc                   V ^8  d   QhRR/# r3   r4   ztuple[bool, str]r   )r5   s   "r!   r6   Source.__annotate__Y   s      ( r    c                    R# )a4  Preflight reachability check.

Returns (ok, reason). Called once before any search() calls in a run.
If ok=False, orchestrator SKIPS this source's searches and reports
the reason in the health line. Examples of failure reasons:
'auth expired', 'HTTP 403 (Cloudflare)', 'DataDome CAPTCHA',
'connect timeout'.
Nr   r;   s   &r!   healthSource.healthX       r    Nc               $    V ^8  d   QhRRRRRR/# r3   criteriar	   
known_urlszset[str] | Noner4   zIterator[PropertyHit]r   )r5   s   "r!   r6   rJ   d   s#      ~ *6Kr    c                    R# )ub  Yield matching properties for a single campaign×region combination.

Sources SHOULD respect criteria.limit and criteria.max_pages to bound
the crawl. SHOULD apply upstream filters (price/size) in the URL/payload
when the platform supports it.

Args:
    criteria: filters to apply.
    known_urls: optional set of URLs the orchestrator has already seen.
        Sources should skip these EARLY (before HTTP fetch when possible,
        else before yielding) so that limit semantics measure FRESH
        discoveries, not re-discoveries of cached data. When None,
        the source treats all results as new.
Nr   r;   rR   rS   s   &&&r!   searchSource.searchc   rO   r    r   N)r   r   r   r   r   r   rG   r   rM   rV   r   r   r    r!   rB   rB   Q   s@    4
IM4   r    rB   c                  B    ] tR t^vtRtR R ltR R ltR
R R lltR	tR# )DisabledSourcezBase class for parked sources (e.g. bot-blocked platforms).

Useful for: making blocked sources visible in the orchestrator + yaml
config without requiring conditional imports or special-casing.
c               $    V ^8  d   QhRRRRRR/# )r3   rC   r
   rE   rD   reasonr   )r5   s   "r!   r6   DisabledSource.__annotate__|   s!     ' 'S 'Y ' 'r    c                	*    Wn         W n        W0n        R # rX   )rC   rE   _disabled_reason)r;   rC   rE   r\   s   &&&&r!   __init__DisabledSource.__init__|   s    	" &r    c                   V ^8  d   QhRR/# rI   r   )r5   s   "r!   r6   r]      s     ; ;( ;r    c                	$    R RV P                    23# )Fz
disabled: )r_   rL   s   &r!   rM   DisabledSource.health   s    
4#8#8"9:::r    Nc               $    V ^8  d   QhRRRRRR/# rQ   r   )r5   s   "r!   r6   r]      s#      ~ *6Kr    c                	    \        R4      # )Nr   )iterrU   s   &&&r!   rV   DisabledSource.search   s    Bxr    )r_   rE   rC   rX   )	r   r   r   r   r   r`   rM   rV   r   r   r    r!   rZ   rZ   v   s    
'
; r    rZ   N)r   
__future__r   abcr   r   dataclassesr   r   typingr   r	   r#   rB   rZ   r   r    r!   <module>rm      sd   & # # (    $   D"S "JV r    