+
    j8                       R t ^ RIHt ^ RIt^ RIt^ RIt^ RIHt ^ RIH	t	 ]! ]
4      P                  4       P                  P                  t]! ]4      ]P                  9  d#   ]P                  P!                  ^ ]! ]4      4       ^ RIHtHtHt RtRZR R llt/ R	R
bRRbRRbRRbRRbRRbRRbRRbRRbRRbRRbRR bR!R"bR#R$bR%R&bR'R(bR)R*bR+R,R-R.R/R0R1R2R3R4R5R6R7R8R9R:R;R<R=R>R?R@RARBRCRDRERF/CtRGt]P2                  ! RH4      t]P2                  ! RI4      t]P2                  ! RJ]P8                  4      tRK RL lt]P2                  ! RM4      t]P2                  ! RN4      t ]P2                  ! RO]P8                  4      t!]P2                  ! RP]P8                  4      t"]P2                  ! RQ]P8                  4      t#RR RS lt$ ! RT RU]4      t% ! RV RW]%4      t& ! RX RY]%4      t'R# )[u)  Email-driven property source ingestion.

Reads property-alert emails from Mail.app via osascript. Bypasses scraper
bot blocks because emails are pushed officially by the platforms after the
user sets up a saved search.

Supports (as of 2026-05-27):
- Leggett (notifications@hestia.leggett.fr) — daily ~10 properties/email
- Properstar (noreply@properstar.com) — saved-search updates

Why this matters: leggett.fr is Cloudflare-blocked for our scraper, but
the email alerts arrive freely with the same properties + a parseable REF
that maps to frenchestateagents.com URLs (verified — REF format identical
to what the legacy scraper used before being blocked).

Adding a new sender:
1. Set up a saved search on the platform (logged in, "save this search +
   email me alerts"). Verify alerts arrive.
2. Inspect a sample email body — find regex patterns for url/price/region.
3. Add a parse_<source>() function returning Iterator[PropertyHit].
4. Add a Source subclass mirroring the pattern below.
5. Register in sources/__init__.py.
6. Add to campaigns.yaml.
)annotationsN)Path)Iterator)PropertyHitSearchCriteriaSourceu  
on run argv
    set senderFilter to item 1 of argv
    set daysBack to (item 2 of argv) as integer
    set cutoffDate to (current date) - (daysBack * days)
    tell application "Mail"
        set targetAccount to first account whose name is "Gmail"
        set theBox to first mailbox of targetAccount whose name is "INBOX"
        set msgs to messages of theBox
        set results to ""
        set i to 1
        set total to count of msgs
        if total > 500 then set total to 500
        repeat while i ≤ total
            set m to item i of msgs
            try
                set theDate to date received of m
                if theDate < cutoffDate then exit repeat
                set s to (sender of m as string)
                if s contains senderFilter then
                    set results to results & "===EMAIL===" & return
                    set results to results & "SENDER: " & s & return
                    set results to results & "DATE: " & (theDate as string) & return
                    set results to results & "SUBJECT: " & (subject of m as string) & return
                    set results to results & "BODY:" & return
                    try
                        set results to results & (content of m as string) & return
                    end try
                end if
            end try
            set i to i + 1
        end repeat
        return results
    end tell
end run
c               $    V ^8  d   QhRRRRRR/# )   sender_filterstr	days_backintreturnz
list[dict] )formats   "\/Users/jonathan/Documents/Zakelijk/ClaudeOS/03_Lab/farmmatch/scraper/sources/email_ingest.py__annotate__r   T   s!     # # # #Z #    c           	     F    \         P                  ! RR\        V \        V4      .RR^<R7      pTP
                  ^ 8w  d   . # TP                  p. pTP                  R4      pT EF  pTP                  4       '       g   K  / pTP                  R4      pRp	\        T4       F  w  rTP                  R4      '       d   T\        R4      R TR	&   K/  TP                  R
4      '       d   T\        R
4      R TR&   KY  TP                  R4      '       d   T\        R4      R TR&   K  TR8X  g   K  T
^,           p	 M	  T	^ 8  d   RP                  YR 4      TR&   TP                  R4      '       g   EK  TP                  T4       EK   	  T#   \         P                   d    . u # i ; i)zAReturn list of {sender, date, subject, body} for matching emails.	osascriptz-eT)capture_outputtexttimeoutz===EMAIL===

zSENDER: NsenderzDATE: datez	SUBJECT: subjectzBODY:body)
subprocessrunOSASCRIPT_QUERYr   TimeoutExpired
returncodestdoutsplitstrip	enumerate
startswithlenjoingetappend)r
   r   resultrawemailschunkschunkemaillines
body_startidxlines   &&          r   fetch_emailsr7   T   s~   $IOdB
 A	
--CFYY'F{{}}D!
"5)ICz**"&s:'7"8h** $S]^ 4f--#'K(8(9#:i  1W
 * ? IIeK&89E&M99VMM% ' ( M9 $$ 	s   ,F F F u   CÔTES-D'ARMORzcotes-d-armorMORBIHANmorbihanu
   FINISTÈRE	finisterezILLE-ET-VILAINEzille-et-vilaineu   VENDÉEvendeeMAYENNEmayenneORNEorneMANCHEmancheAUDEaudeu   HÉRAULTheraultGARDgardu   DRÔMEdromeu   ARDÈCHEardecheCHARENTEcharentezCHARENTE-MARITIMEzcharente-maritimeDORDOGNEdordogneLOTlotCREUSEcreuseu   CORRÈZEcorrezeGERSgersINDREindreSARTHEsartheCANTALcantalAVEYRONaveyronu   LOZÈRElozereTARNtarnzTARN-ET-GARONNEztarn-et-garonneu   PYRÉNÉES-ATLANTIQUESzpyrenees-atlantiquesu   HAUTES-PYRÉNÉESzhautes-pyreneesu   PYRÉNÉES-ORIENTALESzpyrenees-orientaleszLOT-ET-GARONNEzlot-et-garonnezFhttps://www.frenchestateagents.com/french-property-for-sale/view/{ref}zREF\s*:\s*([A-Z0-9]+)u&   ([A-ZÀ-Ÿ'\- ]+?)\s+€([\d,]+)\s*FAIz(\d+)[\s-]bedc                    V ^8  d   QhRRRR/# r	   r2   dictr   Iterator[PropertyHit]r   )r   s   "r   r   r      s     :
 :
 :
"7 :
r   c              #  `  "   V P                  RR4      p\        \        P                  V4      4      p\	        V4       EF@  w  r4VP                  ^4      pVP                  4       pV^,           \        V4      8  d   W#^,           ,          P                  4       M
\        V4      pWV p\        P                  V4      p	V	'       g   K  V	P                  ^4      P                  4       p
 \        V	P                  ^4      P                  RR4      4      pYP                  4       R pTP                  R4       Uu. uFb  pTP                  4       '       g   K  TP                  4       R8w  g   K2  \        TP                  4       4      ^8  g   KR  TP                  4       NKd  	  ppT'       d   T^ ,          R,          MRpRp\         P                  T4      pT'       d    \        TP                  ^4      4      p\"        P                  YP%                  4       4      p\&        P)                  TR7      p\+        TR	T'       d
   TR
,          MRT 2TRTTRTRT
RTRT P                  R4      RT P                  R4      /R7      x  EKC  	  R#   \         d     EKV  i ; iu upi   \         d     Li ; i5i)z5Yield PropertyHits from one Leggett alert email body.r    ,Nr      ￼Ni,  N)refleggett_emailNP   NzLeggett FRleggett_refleggett_region_rawdescriptionemail_subjectr   
email_dater   )urlsourcetitlepricecountrysearch_regionbedroomsextra)r+   listRE_LEGGETT_REFfinditerr'   groupendr)   startRE_LEGGETT_PRICEsearchr&   r   replace
ValueErrorr%   RE_LEGGETT_BEDS
DEPT_NAMESlowerLEGGETT_URL_TEMPLATEr   r   )r2   r   ref_matchesimrh   block_start	block_endblockpm
region_rawru   afterln
candidatesro   rx   bmrw   rr   s   &                   r   parse_leggettr      sR    99VR D~..t45K+&ggajeeg23a%#k:J2JKA&,,.PSTXPY	+ $$U+XXa[&&(
	++C45E ffhi !&T!2
!22xxz  hhjE1 69"((*o6J BHHJ!2 	 
 .8jmD)R ##K0rxx{+ #z3C3C3EF"))c)2"&1+c"#7G's$j{9!5eii/
 	
O '  		
  sm   C$J.'*J%J.6JJ*J
J:J.J4BJ.JJ.JJ.J+(J.*J++J.z*https://www\.properstar\.\w+/[^\s"\'<>()]+zEUR\s*([\d.,]+)z(\d+)\s*Kamers?z(\d+)\s*Bed\.?z(\d+)\s*Bad\.?c                    V ^8  d   QhRRRR/# r`   r   )r   s   "r   r   r      s     A
 A
D A
%: A
r   c              #  ~  a"   V P                  RR4      p\        P                  V4      p\        V4       Uau. uFG  o\        ;QJ d    V3R lR 4       F  '       g   K   RM	  RM! V3R lR 4       4      '       g   KE  SNKI  	  ppV EF  pVP                  V4      pV\        ^ VR,
          4      V^,            pRp\        P                  V4      p	V	'       d<    \        V	P                  ^4      P                  RR4      P                  R	R4      4      pRp
\        P                  V4      pV'       d    \        VP                  ^4      4      p
Rp\        P                  V4      pV'       d    \        VP                  ^4      4      pVP                  R
4      pV Uu. uFk  pVP!                  4       '       g   K  VP!                  4       R8w  g   K2  \#        VP!                  4       4      ^8  g   KR  RV9  g   K[  VP!                  4       NKm  	  ppV'       d   V^ ,          R,          MRp\%        TRV'       d
   VR,          MRVRV
VRVRV P                  R4      RV P                  R4      /R7      x  EK  	  R# u upi   \         d     ELzi ; i  \         d     ELRi ; i  \         d     EL*i ; iu upi 5i)zYield PropertyHits from one Properstar alert email body.

Properstar emails contain 1-N listing blocks; each has a properstar.* URL,
a description line, a room/bed/bath/type line, and EUR price.
r   rd   c              3  ,   <"   T F	  qS9   x  K  	  R # 5iNr   ).0markerus   & r   	<genexpr>#parse_properstar.<locals>.<genexpr>   s     _)^v{)^s   TFi   N.re   r   rf   EURrg   properstar_emailrj   zProperstar listingrl   ro   rp   r   rq   r   )rr   rs   rt   ru   rv   rx   roomsry   )z	/listing/z/annoncez
/property/z
/eigendom/)r+   RE_PROPERSTAR_URLfindallsetanyfindmaxRE_PROPERSTAR_PRICEr   r   r}   r   r   RE_PROPERSTAR_BEDSRE_PROPERSTAR_ROOMSr%   r&   r)   r   )r2   r   urlsr   listing_urlsrr   r5   nearbyru   r   bedsr   r   rmr3   r   
desc_linesro   s   &  `              r   parse_properstarr      so     99VR D$$T*D t9a3_)^_333_)^__ 	
9  
 iinc!S3Y's3 ''/BHHQK//R8@@bIJ &&v.288A;'  ''/BHHQK(
 T"!&
!&2xxz  hhjE1 69"((*o6J OT\^ BHHJ 	 
 .8jmD)R%&1+c"7K{9!5eii/
 	
M        

s   6J=I=	I= I= I=AJ=:JJ=6JJ=0J&
J=J8<J8J83J8<J8A2J=JJ=JJ=J#J="J##J=&J51J=4J55J=c                  F    ] tR tRtRtRtR tR R ltR tRR	 R
 llt	Rt
R# )_EmailSourceBasei;  u  Shared logic: read once per orchestrator run, then serve from cache.

The orchestrator calls search() once per (region × source × campaign).
For email sources the data isn't region-specific (emails come pre-
filtered by the user's saved searches), so we parse once on the first
call and serve cached PropertyHits on subsequent calls, with downstream
criteria filters applied per call.
Fc                	    R V n         R # r   _cacheselfs   &r   __init___EmailSourceBase.__init__F  s	    04r   c                   V ^8  d   QhRR/# r	   r   zlist[PropertyHit]r   )r   s   "r   r   _EmailSourceBase.__annotate__I  s     " "( "r   c                	    \         hr   )NotImplementedErrorr   s   &r   _load_EmailSourceBase._loadI  s    !!r   c                	P    V P                   f   V P                  4       V n         R # R # r   )r   r   r   s   &r   _ensure_cache_EmailSourceBase._ensure_cacheL  s    ;;**,DK r   Nc               $    V ^8  d   QhRRRRRR/# )r	   criteriar   
known_urlszset[str] | Noner   rb   r   )r   s   "r   r   r   P  s#      ~ *6Kr   c              #  	  "   VP                   V P                  9  d   R # V P                  4        T;'       g    \        4       pV P                   EF  pVP
                  V9   d   K  VP                  '       d0   VP                  '       d   VP                  VP                  8w  d   KX  VP                  '       d(   VP                  ;'       g    ^ VP                  8  d   K  VP                  '       d0   VP                  '       d   VP                  VP                  8  d   K  VP                  '       d)   VP                  ;'       g    ^ VP                  8  d   EK  Vx  EK  	  R # 5ir   )rv   	countriesr   r   r   rr   regionrw   	min_priceru   	max_pricemin_bedroomsrx   )r   r   r   knownhits   &&&  r   r   _EmailSourceBase.searchP  s     4>>1##ce;;Cww%3#4#4#49J9Jhoo9]!!!syy~~A9K9K&K!!!ciiiCII@R@R4R$$$#,,*;*;!x?T?T)TI s:   6E'?E'9E'.E':E'%E'3E'.E'4E' E'r   r   )__name__
__module____qualname____firstlineno____doc__requires_authr   r   r   r   __static_attributes__r   r   r   r   r   ;  s'     M5"' r   r   c                  8    ] tR tRtRtR.tR R ltR R ltRtR	# )
LeggettEmailSourceid  ri   rl   c                   V ^8  d   QhRR/# r	   r   ztuple[bool, str]r   )r   s   "r   r   LeggettEmailSource.__annotate__h       4 4( 4r   c                	     \        R ^R7      pT'       g   R# R\	        T4       R23#   \         d#   pR\        T4      P                   3u Rp?# Rp?ii ; i)leggettr   FNT emails in 30d)Fz+no emails in 30d (subscribe at leggett.fr?)r7   	Exceptiontyper   r)   r   r/   es   &  r   healthLeggettEmailSource.healthh  s^    	0!)r:F GGF}N333	  	0T!W--.//	0   ) AAAAc                   V ^8  d   QhRR/# r   r   )r   s   "r   r   r   q        ( r   c                	j    \        R ^R7      p. pV F  pVP                  \        V4      4       K  	  V# )r   r   )r7   extendr   r   r/   hitsr   s   &   r   r   LeggettEmailSource._loadq  s3    i26"$AKKa() r   r   N	r   r   r   r   namer   r   r   r   r   r   r   r   r   d  s    DI4 r   r   c                  :    ] tR tRtRt. R	OtR R ltR R ltRtR# )
ProperstarEmailSourceiy  r   c                   V ^8  d   QhRR/# r   r   )r   s   "r   r   "ProperstarEmailSource.__annotate__}  r   r   c                	     \        R ^R7      pT'       g   R# R\	        T4       R23#   \         d#   pR\        T4      P                   3u Rp?# Rp?ii ; i)properstar.comr   FNTr   )Fz<no emails in 30d (set saved-search alerts on properstar.nl?)r   r   s   &  r   r   ProperstarEmailSource.health}  s_    	0!"2bAF XXF}N333	  	0T!W--.//	0r   c                   V ^8  d   QhRR/# r   r   )r   s   "r   r   r     r   r   c                	j    \        R ^R7      p. pV F  pVP                  \        V4      4       K  	  V# )r   r   )r7   r   r   r   s   &   r   r   ProperstarEmailSource._load  s5    ."="$AKK(+, r   r   N)rl   ITESPTr   r   r   r   r   r   y  s    D(I4 r   r   )   )(r   
__future__r   rer   syspathlibr   typingr   __file__resolveparent
SCRIPT_DIRr   pathinsertsources._baser   r   r   r!   r7   r   r   compiler{   r   
IGNORECASEr   r   r   r   r   r   RE_PROPERSTAR_BATHSr   r   r   r   r   r   r   <module>r     sr  0 # 	  
  (^##%,,33
z?#(("HHOOAs:' = =#L#V!o!
! +! (	!
 x! y! F! h! F! 	! F! g! 	! 
! ,!  
!!" 
5#!$ h	
FWhhyx
F(4*2&A!
J `  45::GH **-r}}=:
~ JJLM jj!34 jj!3R]]C ZZ 12==A jj!2BMMB A
L&v &R) *, r   