+
    Si`2                        R t ^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIHt ^ RI	H	t	 ^ RI
Ht ^ RIHt ]! ]4      P                  t]! RRRR	R
RRRR/3RRRR	R
RRRR/3RRRR	RRR
RR/3RRRR	RRR
RR/3RRRR	RRRRR/3RRRR	RRR
RR/3RRRR	RRR
RR/3RRRR	RRR
RR/3RRRR	RRR
RR/3RRRR	RRR
RR/3R RR!R	RRR
RR/3R"RR#R	RRR
RR/3R$RR%R	RRR
RR
/3R&RR'R	RRR
RR/3R(RR)R	RRR
RR/3.4      tR* t]! 4       tR+ tR?R, ltR@R- ltRAR. ltRAR/ ltRAR0 ltRAR1 ltRAR2 ltRAR3 ltRAR4 ltRAR5 ltRAR6 ltRAR7 lt RAR8 lt!RAR9 lt"RAR: lt#RAR; lt$RAR< lt%R]$R]R]R]R]R]R]R]R]R]R ] R"]!R$]"R&]#R(]%/t&R= t'](R>8X  d
   ]'! 4        R# R# )BuK  
FarmMatch Pipeline — Unified property analysis from all sources to scored shortlist.

Usage:
    python3 pipeline.py                    # Default steps (skip interactive)
    python3 pipeline.py --all              # All steps (includes search + import)
    python3 pipeline.py --from check       # Start from step
    python3 pipeline.py --only score       # Single step
    python3 pipeline.py --only enrich,score # Multiple specific steps
    python3 pipeline.py --dry-run          # Show plan, don't execute
    python3 pipeline.py --force            # Pass --force to sub-scripts
N)OrderedDict)datetime)Path)STATUS_REMOVEDsearchnamezSearch PlatformsinteractiveTdefaultFcriticalimportzImport SourcescheckzAvailability CheckextractzExtract GPS/KPIsanalyzezGPT Analysis (old)customzCustom CriteriaenrichzEnrich DatamigratezMigrate to StoreleggettzEnrich LeggettgeocodezEnrich GeocodepagedatazEnrich Page DataapiszEnrich APIsgptzGPT AnalysisscorezScore & ShortlistqualityzQuality Reportc                      R FK  p \         V ,          pVP                  4       '       g   K(  VP                  4       '       g   K@  \        V4      u # 	  \        P
                  ! R4      ;'       g    R# )../venv/bin/python3.14python3)r   z../venv/bin/python3)
SCRIPT_DIRexistsis_filestrshutilwhich)	candidateps     P/Users/jonathan/Documents/Zakelijk/ClaudeOS/03_Lab/farmmatch/scraper/pipeline.pyfind_pythonr%   2   sN    F	"88::!))++q6M G <<	"//i/    c                 (    \        R V  R V 24       R# )  N)print)iconmsgs   &&r$   logr,   =   s    	BtfBse
r&   c                V   \         \        \        V ,          4      .T;'       g    . ,           p \        P                  ! V\        \        4      RRRR7      pVP
                  ^ 8w  d>   VP                  ;'       g    RP                  4       RR pRV  RVP
                   RV 23# VP                  ;'       g    RP                  4       P                  R	4      pV'       d   VR,          R
,          MRpRV3#   \        P                   d    RT  R23u # \         d    RT  R23u # i ; i)zERun a Python script as subprocess. Returns (success, output_summary).T  )cwdcapture_outputtexttimeout NFz failed (exit z): 
:Nd   Ndonez timed out (1h)
 not foundi8)PYr   r   
subprocessrun
returncodestderrstripstdoutsplitTimeoutExpiredFileNotFoundError)scriptargsr
   cmdresultr=   linessummarys   &&&     r$   
run_scriptrI   @   s   s:&'
(DJJB
7C,Z%)49!mm))r002459FVHN63D3D2ESQQQ$$"++-33D9%*%)D/W}$$ 1000 ,
+++,s6   AC6 ;)C6 %C6 8'C6  C6 6D(D(D('D(c                F   \         \        \        V ,          4      .T;'       g    . ,           p \        P                  ! V\        \        4      RR7      pVP
                  ^ 8H  RVP
                   23#   \        P                   d    Ru # \         d    RT  R23u # i ; i)z7Run a script with live stdout (for long-running steps).r.   )r/   r2   zexit Fr7   )Fztimed out (1h))r9   r   r   r:   r;   r<   rA   rB   )rC   rD   rE   rF   s   &&  r$   run_script_liverK   Q   s    s:&'
(DJJB
7C,Z$G  A%v/@/@.A'BBB$$ '&& ,
+++,s   AA3 3B B B B c                 (   . p\         R ,          pVP                  4       '       d.   \        RR4       \        R4      w  r4VP	                  RW434       MVP	                  R4       \         R,          pVP                  4       '       d.   \        RR4       \        R4      w  r4VP	                  R	W434       MVP	                  R4       R
P                  R V 4       4      p\        ;QJ d    R V 4       F  '       g   K   RM	  RM! R V 4       4      pWv3# )z	auth.json>zSyncing Properstar favorites...zsync_favorites.py
ProperstarFzfrench_favorites.csvzImporting Leggett favorites...zimport_french_favorites.pyLeggettz; c              3   H   "   T F  w  rq1 R V'       d   RMT 2x  K  	  R# 5i)z: okN ).0nrQ   r+   s   &   r$   	<genexpr>step_import.<locals>.<genexpr>s   s"     P*!#R45s    "c              3   *   "   T F	  w  rqx  K  	  R # 5iNrR   )rS   _rQ   s   &  r$   rU   rV   t   s     ,GqGs   T)rN   Fz!auth.json missing (login expired))rO   Fzfrench_favorites.csv not found)r   r   r,   rI   appendjoinany)forceresultsauthrQ   r+   
french_csvrH   any_oks   &       r$   step_importrb   ^   s    G #D{{}}C2301b./QR 44JC129:	2+,KLiiPPPGS,G,SSS,G,,F?r&   c                 L    V '       d   R .M. p\        RR4       \        RV4      # )--forcerM   zRunning availability check...zcheck_availability.pyr,   rK   r]   rD   s   & r$   
step_checkrg   x   s&    I;RD,-2D99r&   c                    . R
Op. pV Fj  p\         V,          P                  4       '       g   VP                  VRR34       K:  \        RRV R24       \	        V4      w  rEVP                  W4V34       Kl  	  V UUUu. uF  w  r4qd'       d   K  VNK  	  ppppV'       d   RRRP                  V4       23# R\        V4       R	23# u upppi )extract_breadcrumbs.pyFz	not foundrM   zRunning z...zFailed: , Tz extract scripts completed)ri   zextract_gps_and_kpis.pyzgeocode_with_breadcrumbs.py)r   r   rZ   r,   rI   r[   len)r]   scriptsr^   srQ   r+   rY   faileds   &       r$   step_extractro   ~   s    bGGQ&&((NNAuk23C8A3c"#Q-s|$  !(2HA1raaF26!2 3444CL>!;<<< 3s   =CCc                 0    \        R R4       \        R4      # )rM   z?Running GPT analysis (this may take a while and costs money)...zanalyze_from_urls.pyre   r]   s   &r$   step_analyzerr      s    NO122r&   c                 0    \        R R4       \        R4      # )rM   z3Running custom criteria (climate, airport, soil)...zcustom_criteria.pyr,   rI   rq   s   &r$   step_customru      s    BC*++r&   c           
     ,   \         R,          p/ pVP                  4       '       d   \        VRR7      ;_uu_ 4       p\        P                  ! V4       FI  pVP                  R4      \        8X  g   K  R Uu/ uF  qUVP                  V4      bK  	  upW$R,          &   KK  	  RRR4       V'       d   \        RR\        V4       R	24       \        RR
4       \        R4      w  rgV'       g   RV3# V'       d   VP                  4       '       d   \        VRR7      ;_uu_ 4       p\        P                  ! V4      pRRR4       ^ p	X FB  pVP                  R4      V9   g   K  VP                  W$R,          ,          4       V	^,          p	KD  	  \        VRRR7      ;_uu_ 4       p\        P                  ! W^RR7       RRR4       RRV	 R\        V4       R23# R# u upi   + '       g   i     ELF; i  + '       g   i     L; i  + '       g   i     LR; i)z/Run parse_criteria.py with status preservation.zenriched_data.jsonzutf-8)encodingstatusurlNrM   zSaved z" removed statuses for preservationzRunning parse_criteria.py...zparse_criteria.pyFw)indentensure_asciiTzEnriched + restored /z removed statuses)rx   
removed_atremoval_reasonavailability_status_codeavailability_reasonavailability_last_checked)TEnriched)r   r   openjsonloadgetr   r,   rk   rI   updatedump)
r]   enrichedsavedfr#   krQ   r+   datarestoreds
   &         r$   step_enrichr      s   00H E(W--YYq\55?n46'76 89!%%({6'7EE(O " . vc%j\)KLM+,,-GBcz ""(W--99Q<D .AuuU|u$x)A  (C'22aIIdae< 3+H:Qs5zlBSTTT3'7 .--" .- 32s;   1G1G5G
G G0HGG-	0H 	H	c                 0    \        R R4       \        R4      # )rM   z(Migrating CSV+JSON to properties.json...z
migrate.pyrt   rq   s   &r$   step_migrater      s    78l##r&   c                 L    V '       d   R .M. p\        RR4       \        RV4      # )rd   rM   z3Enriching Leggett property metadata (Playwright)...zenrich_leggett.pyre   rf   s   & r$   step_leggettr      s&    I;RDBC.55r&   c                 0    \        R R4       \        R4      # )rM   z3Reverse-geocoding properties without departments...zenrich_geocode.pyre   rq   s   &r$   step_geocoder      s    BC.//r&   c                 L    V '       d   R .M. p\        RR4       \        RV4      # )rd   rM   z/Extracting structured page data (Playwright)...zenrich_page_data.pyre   rf   s   & r$   step_pagedatar      s&    I;RD>?0$77r&   c                 L    V '       d   R .M. p\        RR4       \        RV4      # )rd   rM   u.   Enriching with SoilGrids + Géorisques APIs...zenrich_apis.pyre   rf   s   & r$   	step_apisr      s&    I;RD=>+T22r&   c                 L    V '       d   R .M. p\        RR4       \        RV4      # )rd   rM   z0Running GPT analysis (store-based, JSON mode)...zanalyze_properties.pyre   rf   s   & r$   step_gptr      s&    I;RD?@2D99r&   c                 6    \        R R4       \        RRR.4      # )rM   z Running Cyber Prairie scoring...zcyber_prairie_score.pyz--top10re   rq   s   &r$   
step_scorer      s    /03gt_EEr&   c                 0    \        R R4       \        R4      # )rM   z4Searching platforms for new listings (Playwright)...zsearch_properties.pyre   rq   s   &r$   step_searchr      s    CD122r&   c                 0    \        R R4       \        R4      # )rM   zRunning quality report...zquality_report.pyre   rq   s   &r$   step_qualityr      s    ().//r&   c                  n   \         P                  ! R R7      p V P                  RRRR7       V P                  RRRR	R
7       V P                  RRRR7       V P                  RRRR7       V P                  RRRR7       V P                  4       p\	        \
        P                  4       4      pVP                  '       d   VP                  P                  R4       Uu. uF  q3P                  4       NK  	  ppV FV  pV\
        9  g   K  \        RV RRP                  V4       2\        P                  R7       \        P                  ! ^4       KX  	  TpMVP                  '       d   VP                  \
        9  dQ   \        RVP                   RRP                  V4       2\        P                  R7       \        P                  ! ^4       VP!                  VP                  4      pW&R pM?V Uu. uF3  p\
        V,          R,          '       g   VP"                  '       g   K1  VNK5  	  pp\$        P&                  ! 4       P)                  R4      p\        4        \        RV R\*         24       \        RR@ 24       \-        V4       Fc  w  r\
        V	,          p
W9   pV'       d   RV^,            R2MR pV
R!,          '       d   V'       d   R"MR#p\        RV R$V
R%,          R& V 24       Ke  	  \        RR@ 24       \        R'\/        V4       R(24       \        4        VP0                  '       d   \        R)4       R# . p\2        P2                  ! 4       pV EF[  p	\
        V	,          p
\	        \
        P                  4       4      P!                  V	4      ^,           p\/        \
        4      p\        RRA 24       \        R*V R+V R,V
R%,           24       \        RRA 24       \2        P2                  ! 4       p\4        V	,          ! VP6                  R-7      w  pp\2        P2                  ! 4       V,
          pV'       d   R.MR/pVP9                  WR%,          VVV34       V'       d   \;        R.V R0VR1 R224       M1\;        R3R4V R0VR1 R224       V
R5,          '       d   \;        R3R64        M\        4        EK^  	  \2        P2                  ! 4       V,
          p\        RR@ 24       \        R7VR1 R824       \        RR@ 24       V F4  w  p	ppppVR.8X  d   R.MR3p\        RV RVR& RVR9 R:VR;,           24       K6  	  V Uu. uF  pV^,          R.8w  g   K  VNK  	  ppV'       d1   \        R<\/        V4       R=24       \        P                  ! ^4       R# \        R>\/        V4       R?24       R# u upi u upi u upi )BzFarmMatch Pipeline Runner)descriptionz--all
store_truezInclude interactive import step)actionhelpz--from	from_stepSTEPzStart from this step)destmetavarr   z--onlyzSTEP[,STEP]zRun only these steps)r   r   z	--dry-runzShow plan without executingrd   zForce re-processing,zUnknown step: z. Available: rj   )fileNr	   z%Y-%m-%d %H:%Mz  FARMMATCH PIPELINE  |  z  |  Python: r(   []z   r   z (interactive)r3    r   25sz	  Steps: z to runz  --dry-run: nothing executedz  [r}   z] rq   OKFAILz (z.0fzs)z!!zFAILED: r
   u*   Critical step failed — stopping pipelinez  PIPELINE SUMMARY  |  zs totalz5.0fzs  :N2   Nz
  z step(s) failedz
  All z steps completedz<============================================================u   ────────────────────────────────────────────────────────────)argparseArgumentParseradd_argument
parse_argslistSTEPSkeysonlyr@   r>   r)   r[   sysr=   exitr   indexallr   nowstrftimer9   	enumeraterk   dry_runtime
STEP_FUNCSr]   rZ   r,   )parserrD   step_idsrm   selectedsteps_to_runidxr   istep_idinfowill_runmarkertagr^   t_startstep_numtotal_stepst0rQ   rH   elapsedrx   totalr   r*   rrn   s                               r$   mainr     s   $$1LMF
;\]
{FI_`
->TU
L?\]
	,=RSD EJJL!Hyyy'+yys';<';!GGI';<A~qctyy7J6KLSVS]S]^   	>>&N4>>"2-		(@S?TU\_\f\fgHHQKnnT^^,~#+O8auQx	/B/Bdhh8O ,,.
!
!"2
3C	G	%cU-t
<=	Bxj/)
W~*'1QqSEU"&}"5"5(6(!DL-cU34 * 
Bxj/	Ic,'(
01	G|||-. GiikGW~

%++G4q8%j:, H:Q{m2d6l^<=:, YY[ )

;G))+"fvwHI	GC=34	GC=;<JDFG1  6 IIK'!E	Bxj/	#E#;g
67	Bxj/3:/vw~t44&4*BwtnC~FG 4; !1AAaDDLaaF1S[M12W&678c = P~ 2s$   V(-V-V- V-9V2V2__main__)NFrX   )F))__doc__r   r   r    r:   r   r   collectionsr   r   pathlibr   storer   __file__parentr   r   r%   r9   r,   rI   rK   rb   rg   ro   rr   ru   r   r   r   r   r   r   r   r   r   r   r   r   __name__rR   r&   r$   <module>r      s       
  #    (^""
 	+tiQVXbdijk)tiQVXbdijk-uiQUXbdijk+uiQUXbdijk-yRWYcejkl*yRVYcejklyRVYcejkl+yRVYcejkl)yRVYcejkl6+=%TX[eglmn6--	SWZdfklm6==%TX[eglmn
6>=%TX[egklm6.=%TX[eglmn)yRVYcejkl 	(0 ],"	,4:="3
,
#L$
60
83:F
3
0 kkZ||kk|||
I	8Z|
(^9B zF r&   