+
    j&j!                     8  a  R t$0 t R t^ RIt^ RIt^ RIt^ RIHt ^ RIHt R t	]! ]
4      P                  t]R,          t]R,          tRtRtR	t^ RIt/ t] ^ k RR
 ltR tR R lt]! 4       tRR ltRR ltR tR tR tR tR t RR lt!]RR l4       t"RR lt#R# )u   
Paradisomatch Property Store — Single source of truth.

Usage:
    from store import load, persist, upsert, is_active, detect_source

    store = load()
    upsert(store, 'https://...', {'price': 150000, 'bedrooms': 4})
    persist(store)
N)asynccontextmanager)Pathc           
     .   \        V4      p\        P                  ! \        VP                  4      RVP
                  ,           R,           RR7      w  r# \        P                  ! VRRR7      ;_uu_ 4       p\        P                  ! \        V 4      V^RRR7       R	R	R	4       \        P                  ! W14       R	#   + '       g   i     L(; i  \         d>    \        P                  P                  T4      '       d   \        P                  ! T4       h i ; i)
zWrite JSON atomically: serialise to a temp file in the same dir, then
os.replace() into place. Guarantees readers never see a torn/partial file
even if two writers race (the last replace wins, both files stay valid)..z.tmp)dirprefixsuffixwutf-8encodingF)indentensure_ascii	allow_nanN)r   tempfilemkstempstrparentnameosfdopenjsondump_sanitize_for_jsonreplaceBaseExceptionpathexistsunlink)objpfdtmpfs   &&   Q/Users/jonathan/Documents/Zakelijk/ClaudeOS/03_Lab/paradisomatch/scraper/store.py_atomic_dumpr%      s     	QA3qxx=qvv9KTZ[GBYYr311QII(-qZ_` 2


3 21  77>>#IIcNs*   !C 4%B9C 9C		C /D<Dzproperties.jsonzenriched_data.jsonRemovedzuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36zMParadisomatch/1.0 (property-search project; https://github.com/paradisomatch)c                   V '       d   \        V 4      M\        pVP                  4       '       g   / pMo\        VRR7      ;_uu_ 4       p\        P
                  ! V4      pRRR4       \        X\        4      '       d$   V Uu/ uF  pRV9   g   K  VR,          VbK  	  ppMTp\        P                  ! V4      \        \        V4      &   V#   + '       g   i     Ls; iu upi )zLoad store as {url: property_dict}. Captures a baseline snapshot used
later by save() to perform a merge-aware write (won't clobber concurrent
writes on fields this process didn't touch).r
   r   Nurl)r   
STORE_PATHr   openr   load
isinstancelist_copydeepcopy_LOAD_BASELINESid)r   r    storer#   dataitems   &     r$   r+   r+   8   s     T

A88::!g&&!99Q<D 'dD!!37I445D=&T%[$&4EIEE!&!6OBuIL '& Js   C
	CC
C	c                   ^ RI p\        V \        4      '       d3   VP                  V 4      '       g   VP	                  V 4      '       d   R# T # \        V \
        4      '       d/   V P                  4        UUu/ uF  w  r#V\        V4      bK  	  upp# \        V \        4      '       d   V  Uu. uF  p\        V4      NK  	  up# V # u uppi u upi )u  Recursively replace NaN/Infinity floats with None.

Python's json.dump emits literal `NaN` and `Infinity` by default — valid in
Python's extended JSON but rejected by browser JSON.parse, which breaks the
map_viewer and shortlist pages on load. Strip them before write so any
upstream NaN (e.g. from pandas.to_dict, float arithmetic on missing values)
can't corrupt the JSON output.
N)	mathr,   floatisnanisinfdictitemsr   r-   )r   r6   kvs   &   r$   r   r   J   s     #u

34::c??tDD#t58YY[A[TQ%a(([AA#t/23s!"1%s33J B3s   6C+C
c                H    V ^8  d   QhR\         R\         R\         R\         /# )   baselinein_memon_diskreturn)r:   )formats   "r$   __annotate__rE   ]   s(     ' 't 'T 'D 'T '    c                X   \         P                  ! V4      p\        V P                  4       4      \        VP                  4       4      ,
          pV F  pVP	                  VR4       K  	  VP                  4        F  w  rVWS9  d   \         P                  ! V4      W5&   K%  V P                  V/ 4      pW5,          p\        V4      p	VP                  4        F2  w  rVP                  V
\        4      pV\        8X  g	   W8w  g   K.  WV
&   K4  	  V F  p
W9  g   K  W9   g   K  K  	  WV&   K  	  V# )u	  Merge in-memory changes onto current on-disk state without clobbering
fields that a concurrent writer changed on disk.

Rules per (url, field):
  - field added/changed in memory vs baseline → take in-memory value (this
    process wrote it)
  - field absent in memory but present on disk → take on-disk value
    (concurrent writer added it; we shouldn't erase it)
  - new URLs in memory → add to disk
  - new URLs on disk → preserve
  - URL deleted in memory (was in baseline) → respect that, remove from disk
N)	r.   r/   setkeyspopr;   getr:   	_SENTINEL)r@   rA   rB   mergeddeleted_urlsr(   mem_pbase_pdisk_pout_pr<   mem_vbase_vs   &&&          r$   _three_way_mergerU   ]   s     ^^G$Fx}}'#fkkm*<<L

3 lln
../FKc2&VHAZZ9-F"eo a	 & A~!* 	 
 s) %* MrF   c                   V'       d   \        V4      M\        p\        P                  \	        V 4      4      pVe   VP                  4       '       g   T pM\        VRR7      ;_uu_ 4       p\        P                  ! V4      pRRR4       \        X\        4      '       d$   V Uu/ uF  pRV9   g   K  VR,          VbK  	  upMTp\        W0V4      p\        P                  ! V4      \        \	        V 4      &   V P                  4        V P                  V4       \!        WB4       \#        R\%        V4       RVP&                   24       R#   + '       g   i     L; iu upi )ak  Write store to disk via a three-way merge against current disk state.

Prevents the single-writer cache race: if another process wrote new fields
(e.g. photo_urls from greenacres_photo_harvest) while this process was
running (e.g. recheck_availability marking availability_checked_at), both
sets of changes survive. Without this merge, last-writer-wins clobbers.
Nr
   r   r(   zSaved  properties to )r   r)   r0   rK   r1   r   r*   r   r+   r,   r-   rU   r.   r/   clearupdater%   printlenr   )	r2   r   r    r@   rM   r#   on_disk_rawr4   rB   s	   &&       r$   saver]      s    T

A""2e9-Hqxxzz!g&&!))A,K ' !d33 3>O+$$%DK%+O9D 	!(7; &+^^F%;5	" 	V	F3v;-qvvh
78 '&Os   *E#	E1EE	c                    V'       d   \        V4      M\        p\        V P                  4       4      p\	        W24       \        R\        V4       RVP                   24       R# )zDExport as list-format JSON for backward compat (map viewer, scorer).z	Exported rW   N)r   ENRICHED_PATHr-   valuesr%   rZ   r[   r   )r2   r   r    propss   &&  r$   export_enrichedrb      sD    T
A E	Ic%j\
9:rF   c                2    \        V 4       \        V 4       R# )z)Save store + export enriched in one call.N)r]   rb   )r2   s   &r$   persistrd      s    KErF   c                r    W9  d   RV/W&   VP                  4        F  w  r4Vf   K  W@V,          V&   K  	  R# )zXMerge fields into a property. Only non-None values, never overwrites unmentioned fields.r(   N)r;   )r2   r(   fieldsr<   r=   s   &&&  r$   upsertrg      s5    
S\
=#JqM rF   c                2    V P                  R4      \        8g  # )z!Check if property is not removed.status)rK   STATUS_REMOVEDprops   &r$   	is_activerm      s    88H//rF   c                <    RV 9   d   R# RV 9   d   R# RV 9   d   R# R# )z Detect property source from URL.zfrenchestateagents.comleggett
properstar	idealistaunknown )r(   s   &r$   detect_sourcert      s&    3&scrF   c                j    V P                  R4      ;'       g    V P                  R4      ;'       g    ^ # )z,Get the best available score for a property.cp_scoreoverall_score)rK   rk   s   &r$   	get_scorerx      s*    88JAA488O#<AAArF   c                P    RV 9   d   V P                  R4      R,          RV # V RV # )zShorten URL for display output./N)split)r(   ns   &&r$   	short_urlr~      s-    %(CZ399S>"bq!<S!W<rF   c                 "   ^ RI Hp V! 4       ;_uu_4       GRj  xL
 pVP                  P                  V R7      G Rj  xL
 pVP	                  \
        R7      G Rj  xL
 pVP                  4       G Rj  xL
 p V5x  VP                  4       G Rj  xL
  RRR4      GRj  xL
  R#  L Ll LP L: L  TP                  4       G Rj  xL 
  i ; i L.  + GRj  xL 
 '       g   i     R# ; i5i)z*Shared Playwright browser context manager.)async_playwrightN)headless)
user_agent)playwright.async_apir   chromiumlaunchnew_context	CHROME_UAnew_pageclose)r   r   r    browsercontextpages   &     r$   browser_pager      s      6!!!Q

))8)<<++y+AA%%''	"J--/!! "!!<A' "'--/!! "!!!s   C2B+C2 C B-CB/C6B17C<B5CB3CC2$C%C2-C/C1C3C5C	C
CCC2C/	C
C/	'C/	)	C2c                &  "   V P                  4       G Rj  xL
 pRVP                  4       9   g   RVP                  4       9   dE   V P                  V4      G Rj  xL
  V P                  4       G Rj  xL
 pRVP                  4       9  # R#  Lt L3 L5i)zFWait for Cloudflare challenge to resolve. Returns True if page loaded.Nzjust a momentcheckingT)titlelowerwait_for_timeout)r   
timeout_msr   s   && r$   wait_for_cloudflarer      so     **,E%++-':+F##J///jjl"ekkm33 /"s4   BBABBB1B2BBBc                @    V ^8  d   Qh/ ^ \         9   d
   \        ;R&   # )r?   r0   )__conditional_annotations__r:   )rD   s   "r$   rE   rE      s      	 	f   g	rF   )N)(   )F)i@  )%r   __doc__r   r   r   
contextlibr   pathlibr   r%   __file__r   
SCRIPT_DIRr)   r_   rj   r   NOMINATIM_UAcopyr.   r0   r+   r   rU   objectrL   r]   rb   rd   rg   rm   rt   rx   r~   r   r   rE   )r   s   @r$   <module>r      s   	  	  *  (^""
++
11.	 _   $&'T H	9<;0
B
=
 
" 
"rF   