+
    hW                        R t ^ RIt^ RIt^ RIt^ RIHt ^ RIHt ^ RIHt ^ RI	t
RR ltRR ltRR ltRR	 ltR
 t]R8X  Ed#   ^ RIt]! R4       ]! R4       ]! R4       ]! ]P(                  4      ^8  d"   ]P(                  ^,          R8X  d
   ]! 4        R# ]! ]P(                  4      ^8  d"   ]P(                  ^,          R8X  d
   ]! 4        R# ]! ]P(                  4      ^8  d$   ]P(                  ^,          R8X  d   ]! RR7       R# ]! R^R7       ]! R4       ]! R4       ]! R4       ]! R4       ]! R4       ]! R4       ]! R4       ]! R4       ]! R4       ]! R4       R# R# ) zy
Property Availability Checker
Checks if properties are still available/published and marks unavailable ones for removal
NPath)datetime)BeautifulSoupc           
     R   RR/p \         P                  ! WVRR7      pVP                  pVR8X  d,   RRRRR	R
R\        P                  ! 4       P                  4       /# VR8X  d,   RRRRR	RR\        P                  ! 4       P                  4       /# VR8X  d,   RRRRR	RR\        P                  ! 4       P                  4       /# VR8  d0   RRRVR	RV R2R\        P                  ! 4       P                  4       /# RVP                  9   g"   VP                  P                  R4      '       d,   RRRVR	RR\        P                  ! 4       P                  4       /# \        VP                  R4      pVP                  4       P                  4       p. R%OpV F:  pW9   g   K  RRRVR	RV R2R\        P                  ! 4       P                  4       /u # 	  VP                  R4      ;'       g    VP                  R4      p	RV9   ;'       g    RV9   ;'       g    RV9   p
V	'       g,   RRRVR	RR\        P                  ! 4       P                  4       /# RRRVR	RR\        P                  ! 4       P                  4       /#   \         P                  P                   d/    RRR^ R	R R\        P                  ! 4       P                  4       /u # \         P                  P                   dJ   pRRR^ R	R!\!        T4      R",           R#2R\        P                  ! 4       P                  4       /u R$p?# R$p?ii ; i)&a  
Check if a property is still available

Returns:
    dict with:
    - available (bool): True if property is still active
    - status_code (int): HTTP status code
    - reason (str): Reason for unavailability
    - checked_at (str): ISO timestamp of check
z
User-AgentzuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36T)headerstimeoutallow_redirectsi  	availableFstatus_codereasonzPage not found (404)
checked_ati  z'Property permanently removed (410 Gone)i  z'Access forbidden (403) - likely removedi  zServer error (z) - assuming availablesearch/z0Redirected to search/homepage (property removed)zhtml.parserz)Page contains unavailability indicator: ""h1titlepriceu   €euroz%Missing essential elements (no title)zProperty appears to be activezTimeout - assuming availablezConnection error: :N2   Nz - assuming availableN)
znot availablezniet beschikbaarzno longer availablezniet meer beschikbaarzlisting removed
verwijderdsoldverkochtzunder offerzin optie)requestsgetr   r   now	isoformaturlendswithr   contentget_textlowerfind
exceptionsTimeoutRequestExceptionstr)r   r   r   responser   soup	page_textunavailable_indicators	indicator	has_title	has_pricees   &&          [/Users/jonathan/SynologyDrive/Since Today/PROJECTEN/farmmatch/scraper/check_availability.pycheck_property_availabilityr0      s4    	  NGl
<<gW[\** #Us0hlln668	  #UsChlln668	  #UsChlln668	  #T{N;-7MNhlln668	  x||#x||'<'<S'A'AU{Lhlln668	  X--}= MMO))+	"
 0I%!; I)TUV (,,.":":"<	  0 IIdO99tyy'9	y(UUEY,>UU&IBU	U{Ahlln668	  ;5(,,.224	
 	
 && 
14(,,.224	
 	
 // 
1*3q6#;-7LM(,,.224	
 	

sz   AI3 1I3 1I3 5I3 71I3 )*I3 A I3 0I3 
I3 $I3 I3 I3 *I3 *I3 3A
L&?L&L&>L!L&!L&Tc                v   \        R4      pVP                  4       '       g   \        R4       R# \        R4       \        RQ4       \        R\        P                  ! 4       P                  R4       24       \        RV  RV R	24       \        RQ4       \        VR
RR7      ;_uu_ 4       p\        P                  ! V4      pRRR4       \        R4      p\        4       pVP                  4       '       d`    \        VR
RR7      ;_uu_ 4       p\        \        P                  ! V4      4      pRRR4       V'       d   \        R\        V4       R24       \        X4      pV U	u. uF  qP                  R4      V9  g   K  V	NK  	  pp	V\        V4      ,
          p
V
^ 8  dS   \        RV
 R24       \        VRRR7      ;_uu_ 4       p\        P                  ! WC^RR7       RRR4       \        R4       \        V4      p^ p^ p^ p^ p^ p\        RV 24       \        V^4       EFJ  w  ppVP                  RR4      pVP                  RR4      pVP                  R4      pVR8X  d   V^,          pKN  V '       d   V'       d    \        P                  ! V4      p\        P                  ! 4       V,
          P!                  4       R,          pVV8  d/   V^,          pV^,          ^ 8X  d   \        RV R V R!V R"24       K   VP                  R#R$4      p\        R%V R V R&V 24       \        R'V 24       \        R(V 24       \        R)4       \#        V4      pV^,          pVR*,          VR&   VR+,          VR,&   VR-,          VR.&   VR/,          '       d    V^,          p\        R0VR-,           24       MFV^,          pRVR&   VR*,          VR1&   VR-,          VR2&   \        R3VR-,           24       \        R44       V^
,          ^ 8X  dH   \        VRRR7      ;_uu_ 4       p\        P                  ! WC^RR7       RRR4       \        R5V R624       VV8  g   EK4  \$        P&                  ! ^4       EKM  	  \        RR4       \        R74       \        VRRR7      ;_uu_ 4       p\        P                  ! WC^RR7       RRR4       \        RR4       \        R84       \        RQ4       \        R9V 24       \        R:V 24       \        R;V 24       \        R<V 24       \        R=V 24       \        R>V 24       V^ 8  d&   \        R?V R@24       \        RA4       \)        V4       \        RB4       RC\        P                  ! 4       P+                  4       RDVREVRFVRGVRHVRIVRJV\-        ^V4      ,          ^d,          RK RL2/p\        RMRRR7      ;_uu_ 4       p\        P                  ! VV^RN7       RRR4       \        RO4       \        RPVRJ,           24       R#   + '       g   i     ELZ; i  + '       g   i     EL; i  \         d   p\        RT 24        Rp?ELRp?ii ; iu up	i   + '       g   i     EL; i    EL; i  + '       g   i     ELX; i  + '       g   i     EL; i  + '       g   i     L; i)Sz
Check availability of all properties in enriched_data.json

Args:
    skip_recently_checked: Skip properties checked within recent_threshold_hours
    recent_threshold_hours: Hours to consider a check "recent"
enriched_data.json!   ❌ enriched_data.json not found!Nu"   🔍 Property Availability CheckerzTimestamp: z%Y-%m-%d %H:%M:%SzSkip recently checked: z	 (within zh)rutf-8encodingzremoved_properties.jsonu   
🚫 Loaded z- manually removed URLs (will be filtered out)u0   ⚠️  Could not load removed_properties.json: r   u   🗑️  Filtered out z manually removed propertieswFindentensure_asciiu;      ✅ Saved filtered data (removed blacklisted properties)u   
📊 Total properties:  statusActiveavailability_last_checkedRemovedi  z   [r   z
] Skipped z recently checked properties...locationUnknown
[]    URL: z   Current status: u       🔍 Checking availability...r   r   availability_status_coder   availability_reasonr
   u      ✅ Available - 
removed_atremoval_reasonu      ❌ UNAVAILABLE - u      📝 Marked as 'Removed'u   
   💾 Progress saved (z	 checked)u   💾 Saving final results...u   📊 AVAILABILITY CHECK SUMMARYzTotal properties: u   ✅ Still available: u   ❌ Newly unavailable: u$   ⚠️  Already marked unavailable: u$   ⏭️  Skipped (recently checked): u   🔍 Actually checked: u
   
🗑️   properties marked as 'Removed'zO   They will be filtered out in the map viewer when 'Show Removed' is uncheckedu!   
✨ Availability check complete!	timestamptotal_propertiescheckedskippedstill_availablenewly_unavailablealready_unavailableavailability_ratez.1f%zavailability_check_report.json)r:   u5   
📄 Report saved to: availability_check_report.jsonz   Availability rate: F======================================================================G
======================================================================)r   existsprintr   r   strftimeopenjsonloadsetlen	Exceptionr   dump	enumeratefromisoformattotal_secondsr0   timesleepunfavorite_removed_propertiesr   max)skip_recently_checkedrecent_threshold_hoursenriched_filef
propertiesremoved_filemanually_removed_urlsr.   original_countpfiltered_counttotalrM   rN   rP   rO   rQ   ipropr   current_statuslast_checkedlast_check_timehours_since_checkrA   resultreports   &&                         r/   check_all_propertiesrz      s6    -.M!!12	
./	(O	K//0CDE
FG	#$9#:)DZC[[]
^_	(O 
mS7	3	3qYYq\
 
4 12LE	JlC'::a(+DIIaL(9% ;$s+@'A&BBopq
 _N'UZ55<?T+T!!ZJU#c*o5N&~&66RST-w771IIjAEB 8KM
OEGGO	%eW
-.Z+4hhub!(H5xx ;< Y&1$ !\	"*"8"8"F%-\\^o%E$T$T$VY]$]!$'==qLG2v{QCqz'Babc	 > 88J	2A3awb
+,#N#34502 -S11 -3<,@()+1-+@'(&,X&6"#+q O'x(8'9:;"&DN!'!5D%+H%5D!")&*:);<=/1 R<1mS7;;q		*F <.wiyAB u9JJqMs ,x 
/	
()	mS7	3	3q		*> 
4 
/	
+,	(O	ug
&'	!/!2
34	#$5#6
78	01D0E
FG	0	
:;	#G9
-.1-..MNO_` 	&j1	
./ 	X\\^--/E77?.23q'?!Bc I#NaP	F 
.g	F	F!		&!A& 
G 
BD	"6*=#>"?
@AS 
4	3	3 ;::  	JDQCHII	J
 V 877B< <;; 
4	3	3F 
G	Fs   (X3X8 	 X$)X8 9X8  Y<YY$=A:Y8Z ZZ(X!	$X5	/	X8 8YYY$Y5	8Y= ZZ%	(Z8	c           	     
   \        R4       \        R,4       \        R4      pVP                  4       '       g   \        R4       R#  \        P                  ! R4      pRVP
                  9  d   \        R4       \        R4       R#  \        Y"R,          R	8H  ,          R
,          P                  4       4      pT'       g   \        R4       R# \        R\        T4       R24       \        T4      R,           F  p\        RT 24       K  	  \        T4      ^8  d    \        R\        T4      ^,
           R24       T '       EdI   \        P                  ! 4       P                  R4      p\        R4      P                  4       '       d>   RT R2p\        P                  ! R4      pTP                  TRR7       \        RT 24       RT R2p	TP                  T	RR7       \        RT	 24       \        R4      P                  4       '       d   RT R2p
\        RRRR7      ;_uu_ 4       p\         P"                  ! T4      pRRR4       \        T
R RR7      ;_uu_ 4       p\         P$                  ! XT^RR!7       RRR4       \        RT
 24       \        R4      P                  4       '       dz   \        P                  ! R4      p\        T4      pYR
,          P'                  T4      ( ,          p\        T4      pTP                  RRR7       \        R"T R#T R$Y,
           R%24       \        T4      pY"R
,          P'                  T4      ( ,          p\        T4      pTP                  RRR7       \        R&T R#T R$Y,
           R%24       \        R4      P                  4       '       d   \        RRRR7      ;_uu_ 4       p\         P"                  ! T4      pRRR4       \        X4      pT Uu. uF  qP)                  R'4      T9  g   K  TNK  	  pp\        T4      p\        RR RR7      ;_uu_ 4       p\         P$                  ! Y^RR!7       RRR4       \        R(T R#T R$Y,
           R%24       \        R)\        T4       R*24       \        R+4       R#   \         d   p\        RT 24        Rp?R# Rp?ii ; i  + '       g   i     EL; i  + '       g   i     EL_; i  + '       g   i     EL; iu upi   + '       g   i     L; i)-a	  
Remove properties that returned 404 (Page Not Found) based on Status_404 column
in extracted_property_urls.csv. Removes from:
- analysis_output.csv
- extracted_property_urls.csv
- enriched_data.json

Args:
    backup: Create backup before removing (default: True)
u"   🗑️  404 Property Removal Toolzextracted_property_urls.csvu*   ❌ extracted_property_urls.csv not found!N
Status_404uA   ℹ️  No Status_404 column found in extracted_property_urls.csvz=   Run: python3 extract_breadcrumbs.py to check for 404 pagesu/   ❌ Error reading extracted_property_urls.csv: TURLu   ✅ No 404 pages foundFound z properties with 404 status::N   Nz   - z   ... and z more%Y%m%d_%H%M%Szanalysis_output.csvanalysis_output_backup_z.csvF)index   ✅ Backup created: extracted_property_urls_backup_r2   enriched_data_backup_.jsonr4   r5   r6   r8   r9   u   
📊 analysis_output.csv:     → z (	 removed)u"   📊 extracted_property_urls.csv: r   u   📊 enriched_data.json:    
✅ z7 404 properties permanently removed from all data filesz?   These properties returned Page Not Found and no longer existrT   )rW   r   rV   pdread_csvcolumnsr^   r\   tolistr]   listr   r   rX   to_csvrY   rZ   r[   r_   isinr   )backupbreadcrumb_filedf_breadcrumbsr.   page_404_urlsr   rK   backup_analysisdf_analysisbackup_breadcrumbsbackup_enrichedrj   enriched_datarn   	new_countro   s   &               r/   remove_404_propertiesr   +  sn    

./	(O 89O!!##:;%BC~555UVQR 6 l'Ct'KLUSZZ\]M&'	F3}%&&B
CDM"2&&cUm '
=AC.2359: vLLN++O<	 %&--// 7	{$GO++&;<Ke<((9:;  ?ykN0>$%7$89: $%,,.. 5i[FO*C'BBa $		! CosW==		-15I >((9:; !"))++kk"78[)!u#5#:#:=#I"IJ$	0>,^,<E)B~OiNjjstu (N#E%:%?%?%N$NONN#I7uE	.~.>eI;bQ_QkPllu
vw  !((**&g>>! IIaLM ?]+$1WMqUU5\5VMW&	&g>>!IImquE ?).)9ykNLfKggpqr	F3}%&&]
^_	
KLM  ?sCDH CBB===, ?>> X>>sT   =R $R6S
:S(S2S2-S7R3R..R36S	
S	S/	7T	c           	        \        R4      pVP                  4       '       g   \        R4       R# \        R4       \        R!4       \        VRRR7      ;_uu_ 4       p\        P
                  ! V4      pRRR4       \        R X 4       4      pV^ 8X  d   \        R	4       R# \        R
V R24       V '       do   R\        P                  ! 4       P                  R4       R2p\        VRRR7      ;_uu_ 4       p\        P                  ! W2^RR7       RRR4       \        RV 24       V Uu. uF  qfP                  R4      R8w  g   K  VNK  	  pp\        R\        V4       R24       \        R\        V4       R24       \        RV R24       \        RV R24      P                  4       pVR8X  db   \        VRRR7      ;_uu_ 4       p\        P                  ! Wr^RR7       RRR4       \        RV R24       \        R\        V4       R24       R# \        R 4       R#   + '       g   i     EL; i  + '       g   i     EL,; iu upi   + '       g   i     Ls; i)"z
Permanently remove properties marked as 'Removed' from enriched_data.json

Args:
    backup: Create backup before removing (default: True)
r2   r3   Nu   🗑️  Property Removal Toolr4   r5   r6   c              3   T   "   T F  qP                  R 4      R8X  g   K  ^x  K   	  R# 5i)r=   r@   N)r   ).0ro   s   & r/   	<genexpr>0remove_unavailable_properties.<locals>.<genexpr>  s     N:axI1M:s   (
(u%   ✅ No properties marked as 'Removed'r~   rJ   r   r   r   r8   Fr9   r   r=   r@   u   
📊 Before: z propertiesu   📊 After:  u   🗑️  Removed: u   
⚠️  Permanently remove z properties? [y/N]: yr   z properties permanently removedu$   ✨ enriched_data.json now contains z active propertiesu   
❌ Removal cancelledrT   )r   rV   rW   rY   rZ   r[   sumr   r   rX   r_   r   r]   inputr!   )	r   ri   rj   rk   removed_countbackup_filero   active_propertiesconfirms	   &        r/   remove_unavailable_propertiesr     s    -.M!!12	
*+	(O 
mS7	3	3qYYq\
 
4 N:NNM56	F=/!@
AB -hlln.E.Eo.V-WW\]+sW55IIjAEB 6$[M23 %/OJq%%/Y2NJO	OC
O,K
89	M#/01
=>	}o[
9: 3M?BVWX^^`G#~-w771II'15I 8}o%DEF4S9J5K4LL^_`'(G 
4	3	3 655
 P 87s0   H5H2)IIIH/	2I	I	c           	     P   ^ RI p^ RIHp V  Uu. uF  q3P                  R4      R8X  g   K  VNK  	  ppV'       g   R# \	        R24       \	        R4       \	        R14       \	        R\        V4       R24       V! R4      pVP                  4       '       g   \	        R	4       \	        R
4       R# V! \        4      P                  R,          pVP                  4       '       g   \	        R4       R# \	        R4       ^ p^ p\        V^4       F  w  rV
P                  R4      pV
P                  RR4      p\	        RV	 R\        V4       RV 24       \	        RV 24        ^ RI
pVP                  R\        V4      V.RR^<R7      pVP                  ^ 8X  d   \	        R4       V^,          pM4\	        R4       \	        RVP                  R,           24       V^,          p V	\        V4      8  g   K  \"        P$                  ! ^4       K  	  \	        R24       \	        R4       \	        R14       \	        RV 24       \	        R V 24       \	        R!\        V4       24       \	        R"4       \	        R#4       V^ 8  Ed   \	        R24       \	        R$4       \	        R14       V! R%4      pVP                  4       '       d   \'        VR&R'R(7      ;_uu_ 4       p\(        P*                  ! V4      pRRR4       \        X4      pV Uu. uF  q3P                  R4      R8w  g   K  VNK  	  pp\        V4      p\'        VR)R'R(7      ;_uu_ 4       p\(        P,                  ! VV^R*R+7       RRR4       \	        R,4       \	        R-V R.V R/VV,
           R024       R# R# R# u upi   XP                   d    \	        R4       T^,          p EL\          d3   p\	        R\        T4      R,           24       T^,          p Rp?ELRp?ii ; i  + '       g   i     EL!; iu upi   + '       g   i     L; i)3z
Automatically unfavorite properties that are marked as 'Removed' on Properstar

Args:
    properties: List of property dictionaries
Nr   r=   r@   u-   🗑️  AUTO-UNFAVORITING REMOVED PROPERTIESr~   z/ removed properties to unfavorite on Properstarz	auth.jsonu6   ⚠️  auth.json not found - skipping auto-unfavoritezK   Login required: ./login.sh or double-click 'Login to Properstar.command'zunfavorite_property.pyuC   ⚠️  unfavorite_property.py not found - skipping auto-unfavoriteu(   
🔄 Unfavoriting removed properties...r   rA   rB   rC   r   rD   rE   python3T)capture_outputtextr   u      ✅ Unfavorited successfullyu      ❌ Failed to unfavoritez
   Error: :Nd   Nu      ⏱️  Timeout - skippingu      ❌ Error: u   📊 AUTO-UNFAVORITE SUMMARYu   ✅ Successfully unfavorited: u   ❌ Failed: u   📊 Total processed: u   
✨ Auto-unfavorite complete!z7   Removed properties are now unfavorited on Properstaru5   🗑️  PERMANENTLY REMOVING FROM enriched_data.jsonr2   r4   r5   r6   r8   Fr9   uB   ✅ Removed properties permanently deleted from enriched_data.jsonz   r   z properties (r   rT   rU   )asynciopathlibr   r   rW   r]   rV   __file__parentr`   
subprocessrunr&   
returncodestderrTimeoutExpiredr^   rc   rd   rY   rZ   r[   r_   )rk   r   r   ro   removed	auth_fileunfavorite_script	successesfailuresrr   rs   r   rA   r   rx   r.   ri   rj   all_propertiesrn   r   r   s   &                     r/   re   re     s     %E*Qh9(Dqq*GE	/	
9:	(O	F3w<. O
PQ [!IFG[\ X--0HH##%%ST	
56IHWa(hhuo88J	2A3aG~Rz23	^^C 12C8#	 $ F   A%79Q	35
6==#6"789A s7|JJqME )J 
/	
()	(O	*9+
67	L

#$	"3w<.
12	
+,	
CD 1}oEFh12!!mS7;;q!%1 < !0N -; [NqeeHoQZ>ZN [-.I mS7;;q		+QquM < VXC'uYK}^V_E_D``ijk! " W Fj (( 	13MH 	N3q6$<.12MH	6 <;; !\ <;sY   NNA
N&3NO<;PP P%O9=O9O9'O44O9<P	P%	__main__u,   🔍 FarmMatch Property Availability Checkerz--remove-404z--removez--forceF)rg   )rg   rh   u   💡 NEXT STEPS:z.1. Review removed properties in the map viewerz#2. To permanently delete them, run:z)   python3 check_availability.py --removez5
3. To remove 404 pages (from breadcrumb extraction):z-   python3 check_availability.py --remove-404z9
4. To force check all properties (ignore recent checks):z(   python3 check_availability.py --force)
   )T   )TrT   rU   )__doc__rZ   r   rc   r   r   r   bs4r   pandasr   r0   rz   r   r   re   __name__sysrW   r]   argv     r/   <module>r      sE         {
z^B@aMF4)lll\ z	/	
89	(O
388}qSXXa[N:	SXX	sxx{j8%'	SXX	sxx{i759 	4PRSo !h>?349:FG=>JK89; r   