+
    Si/                         R t ^ RIt^ RIt^ RIHt ^ RIHt ^ RIHtH	t	H
t
HtHtHt . ROt. ROtR tR tR tR tR	 tR
 tR tR tR tR tR tR tR t]R8X  d
   ]! 4        R# R# )u  
FarmMatch Quality Report — Data completeness, score integrity, outcome metrics.

Three layers:
  1. Data Completeness Matrix — per-source field coverage
  2. Score Integrity Checks — hallucinated/blocked analysis, uniform scores
  3. Pipeline Outcome Metrics — score distribution, top-10 confidence

Usage:
    python3 quality_report.py                # Terminal summary (default)
    python3 quality_report.py --detail       # Per-property flags
    python3 quality_report.py --json         # Machine-readable output
N)Counter)Path)load	is_activedetect_source	get_score	short_urlSTATUS_REMOVEDc                j    \        V P                  RR4      4      pVR8w  d   VP                  4       # R# )z%Get display name for property source.url unknownOther)r   gettitle)propsrcs   & V/Users/jonathan/Documents/Zakelijk/ClaudeOS/03_Lab/farmmatch/scraper/quality_report.pyprop_sourcer      s/    
+
,C*399;77    c                    V P                  V4      pVf   R# \        V\        4      '       d   VP                  4       R8X  d   R# \        V\        4      '       d   \        V4      ^ 8X  d   R# R# )z6Check if a property has a non-empty value for a field.Fr   T)r   
isinstancestrstripdictlen)r   fieldvals   && r   	has_fieldr   "   sQ    
((5/C
{#s		r 1#tSQr   c                    V P                  R4      ;'       g    RP                  4       pV P                  R4      ;'       g    RpRV9   g   RV9   d   R# \        V4      ^28  d   R# R# )z0Check if GPT analysis is real (not bot-blocked).r   r   analysiszjust a momentzchecking your browserFT)r   lowerr   )r   r   r    s   &  r   is_analysis_okr"   .   s\    XXg$$"++-Exx
#))rH%#:e#C
8}rr   c           	        / pV  F  p\        V4      pW19  d   R^ R\         Uu/ uF  qD^ bK  	  upR^ /W&   W,          R;;,          ^,          uu&   \         F8  p\        W%4      '       g   K  W,          R,          V;;,          ^,          uu&   K:  	  \        V4      '       g   K  W,          R;;,          ^,          uu&   K  	  V# u upi )zPer-source field coverage.countfieldsanalysis_ok)r   COMPLETENESS_FIELDSr   r"   )props	by_sourcepr   fr   s   &     r   completeness_matrixr,   ;   s    I!n%q(CV4WCVaTCV4WYfhijINw1$(E""x(/14/ ) !N=)Q.)   5Xs   Cc                ,   . pV  F  p\        V4      '       g   VP                  VR,          4       K.  VP                  R4      '       d   KG  / '       d   KQ  VP                  R4      ;'       g    ^ ^ 8  g   Ks  VP                  VR,          4       K  	  V# )z6Flag properties where GPT analyzed a bot-blocked page.r   criteriaoverall_score)r"   appendr   )r(   blockedr*   s   &  r   detect_blocked_analysisr2   M   sn    Ga  NN1U8$%%
##rro0F0K0K!q/PNN1U8$	 
 Nr   c                   \        4       p/ pV  F  pVP                  R4      ;'       g    / pV'       g   K(  \        \        R VP	                  4        4       4      4      pV'       g   K[  W;;,          ^,          uu&   VP                  V. 4      P                  VR,          4       K  	  / pVP	                  4        F6  w  rWV^8  g   K  RVRW%,          R,          /V\        \        V4      4      &   K8  	  V# )zBFind criteria patterns that repeat 5+ times (likely hallucinated).r.   c              3   h   "   T F(  w  r\        V\        \        34      '       g   K#  W3x  K*  	  R # 5iN)r   intfloat).0kvs   &  r   	<genexpr>(detect_uniform_scores.<locals>.<genexpr>a   s)     b2B$!jQRUXZ_T`Favv2Bs    22r   r$   urlsN   N)	r   r   tuplesorteditems
setdefaultr0   r   r   )r(   patternsurl_by_patternr*   r.   pattern
suspiciousr$   s   &       r   detect_uniform_scoresrH   X   s    yHN55$**b(..2Bbbc7"%%gr299!E(C  J"..*A:/3.Js4=)* + r   c                   a . pV  Fs  o\        S4      pV^ 8  g   K  \        ;QJ d    V3R lR 4       F  '       g   K   RM	  RM! V3R lR 4       4      pV'       d   K[  VP                  SR,          4       Ku  	  V# )z<Properties with GPT scores > 0 but price/land/beds all null.c              3   <   <"   T F  p\        SV4      x  K  	  R # 5ir5   r   )r8   r+   r*   s   & r   r;   -detect_scores_without_data.<locals>.<genexpr>v   s     Y3Xa)Aq//3Xs   TFr   )priceland_size_m2bedrooms)r   anyr0   )r(   flaggedscorehas_anyr*   s   &   @r   detect_scores_without_datarT   p   s`    G!19cY3XYcccY3XYYG7qx(  Nr   c                    R^ R^ R^ R^ /pV  F}  p\        V4      pVR8  d   VR;;,          ^,          uu&   K,  VR8  d   VR;;,          ^,          uu&   KJ  VR8  d   VR;;,          ^,          uu&   Kh  VR;;,          ^,          uu&   K  	  V# )zScore distribution buckets.z4.0+z3.0-4.0z2.0-3.0z<2.0g      @g      @g       @r   )scored_propsbucketsr*   ss   &   r   score_histogramrZ   ~   s    q)Q	1fa@GaL8FOq O#XI!##XI!#FOq O  Nr   c                  a  . p\        V 3R l\         4       4      pV^8:  d   VP                  R4       S P                  R4      V9   d   VP                  R4       S P                  R4      '       g   VP                  R4       S P                  R4      '       g   VP                  R4       V'       g   R	V. 3# RV9   g   V^ 8X  d   R
W23# RW23# )z7Rate confidence in a property's score: high/medium/low.c              3   P   <"   T F  p\        SV4      '       g   K  ^x  K  	  R# 5i   NrK   r8   r+   r   s   & r   r;   $assess_confidence.<locals>.<genexpr>   s     @)1YtQ-?QQ)   &
&	thin_datar   blocked_analysisrM   no_pricelatno_coordinateshighlowmedium)sumCONFIDENCE_FIELDSr0   r   )r   blocked_urlsflagsdcs   f&  r   assess_confidencero      s    E	@)@	@B	Qw[!xx,&'(88GZ 88E??%&r2~U"bAgbRr   c                   \        V P                  4       4      pV Uu. uF  p\        V4      '       g   K  VNK  	  pp\        V4      \        V4      ,
          p\	        V4      p\        V4      p\        V4      p\        V4      pV Uu. uF  p\        V4      '       g   K  VNK  	  p	p\        V	4      p
\        V4      pR p\        WR7      R,          pV Uu. uF  q".\        W+4      O5NK  	  ppR\        V4      R\        V4      RVRVR\        V4      R	VR
VR\        V4      RVR\        V	4      RV
RV/# u upi u upi u upi )zGenerate full quality report.c                     \        V 4      ) # r5   rV   )xs   &r   <lambda>!generate_report.<locals>.<lambda>   s
    )A,r   )key:N
   Ntotalactiveremovedcompletenessblocked_countrl   uniform_patternsunverifiable_countunverifiable_urlsscored_count	histogramtop10)listvaluesr   r   r,   r2   rH   rT   r   rZ   setrA   ro   )storer(   r*   rx   ry   matrixr1   uniformunverifiablescoredr   blocked_setsort_keyr   
top10_confs   &              r   generate_reportr      s@    E/A)A,aaF/%j3v;&G !(F &f-G#F+G-f5L  0A9Q<aaF0'I g,K&H6(-ECHI5a9(895JI 	U#f+7WGc,/\FY + 0 1 Js   D9D9	D>!D>Ec                X   \        4        \        R4       \        RRK 24       \        RV R,           RV R,           RV R,           R	24       \        R
V R,           24       \        4        \        R4       \        RRL 24       RRR RRR RRR RRR RRR RRR RRR RRR 2p\        V4       \        RRL 24       R^ R\         Uu/ uF  q"^ bK  	  upR^ /p\        V R,          P                  4       4       EF  pV R,          V,          pVR,          pVR;;,          V,          uu&   VR;;,          VR,          ,          uu&   . p\         Fj  pVR,          V,          pVR,          P	                  V^ 4      V,           VR,          V&   TP                  V'       d   V^d,          V,          R R2MR4       Kl  	  V'       d   VR,          ^d,          V,          R R2MRp	\        RVR RVR RRP                  VR ,          4       RV	 24       EK  	  VR,          pV'       d   \        RRL 24       \         Uu. uF$  q#R,          V,          ^d,          V,          R R2NK&  	  ppVR,          ^d,          V,          R R2p	\        RR!R RVR RRP                  VR ,          4       RV	 24       \        4        \        R"4       \        RRL 24       V R#,          '       d   \        R$V R#,           R%24       V R&,          '       dJ   \        R' V R&,          P                  4        4       4      p
\        R$V
 R(\        V R&,          4       R)24       V R*,          '       d   \        R$V R*,           R+24       V R#,          '       g*   V R&,          '       g   V R*,          '       g   \        R,4       \        4        \        R-4       \        RRL 24       V R.,          '       d!   \        V R.,          P                  4       4      M^pV R.,          P                  4        F<  w  rRV^,          \        V^4      ,          ,          p\        RVR/ RVR0 RV 24       K>  	  \        4        \        R14       \        RRL 24       \        V R2,          4       F  w  pw  pppp\        V4      p\        VR3,          ^#4      pVR48X  d   R5M
VR68X  d   R7MR8pV'       d   R9R:P                  V4       R;2MR<p\        RV R=V^,           R> RVR? R@V RAV V 24       K  	  \        4        . pV R#,          '       d   VP                  RBV R#,           RC24       \        RD V R2,           4       4      pV'       d   VP                  V RE24       \        RF V R2,           4       4      pV'       d   VP                  V RG24       V'       dQ   \        RH4       \        RRL 24       \        V4       F  w  pp\        RV^,            RIV 24       K   	  \        4        \        RRK 24       RJ# u upi u upi )Mz Print terminal-friendly summary.z  QUALITY REPORT  z  Properties:  rw   z	 total | rx   z
 active | ry   z removedz  Scored:      r   z'  DATA COMPLETENESS (active properties)Sourcez<18 #z>4rM   z>6landre   bedscritGPTr$   r%   r&   rz   z>5%u       —r>   Totalz  SCORE INTEGRITYr{   z  !! z. properties with blocked/hallucinated analysisr|   c              3   2   "   T F  qR ,          x  K  	  R# 5i)r$   N )r8   r:   s   & r   r;    print_summary.<locals>.<genexpr>   s     T0S1gJJ0Ss   z properties share z repeated score patternsr}   z* properties scored without supporting dataz!  OK No integrity issues detectedz  SCORE DISTRIBUTIONr   z>8z<30z  TOP 10 CONFIDENCEr   r   rg   OKrh   z!!z??z  (, )r   z #z>2z.1fz  data=z/5  zRe-analyze z= blocked properties after enrich_leggett scrapes real contentc              3   >   "   T F  w  rr4R V9   g   K  ^x  K  	  R# 5i)rd   Nr   r8   r*   cdr+   s   &    r   r;   r     s     Jq*/11   
u=    top-10 properties missing price — manual check recommendedc              3   >   "   T F  w  rr4VR 8X  g   K  ^x  K  	  R# 5i)rh   Nr   r   s   &    r   r;   r     s     Eq!u*11r   uC    top-10 properties have LOW confidence — verify before presentingz  RECOMMENDED ACTIONSz. N<============================================================u   ────────────────────────────────────────────────────────────)printr'   rA   keysr   r0   joinrj   r   r   maxrB   	enumerater   r   )reportheaderr+   totalsr   datanpctsr:   gpt_pcttotal_uniform	max_countbucketr$   barir   confrn   rm   rR   	url_shorticonflag_stractionsrd   low_confactions   &                           r   print_summaryr      s   	G		Bxj/	OF7O,IfX6F5GzRXYbRcQddl
mn	OF>23
45	G 
35	Bzl
(3qR72,ar{!E":QvVXkYZ[abdZeefglmofpqF	&M	Bzl
q(3F$G3FaT3F$GXYZFf^,1134n%c*Mw1}m!44$AXq!A"("2"6"6q!"<q"@F8QKKQ1S5!8B-q)I> % :;T-(,a/315	3s)1QrF"TYYtBx%8$9G9EF 5 	wA:, =PQ=P(#A&s*A-b13=PQM*3.1"5Q773-q2b48)<(=RyIJ	G 
	Bzl
of_-..\]^ !!T7I0J0Q0Q0STTm_$6s6BT;U7V6WWopq"##f1233]^_/""62D+E+EfUiNjNj13	G 
 "	Bzl
5;K5H5HF;'..01aI,224URZ3y!#4456"+RCy5'23 5 
G 
!	Bzl
&/w&@""D$E$d5k2.	v~t45=4d05S5)*!,24&1Q3r("U3Kwrd$yk(TU 'A 
G GoVO%<$==z{|JvgJJH(#`abEvgEEH(#fgh%':, "7+IAvBqse2fX&' ,	Bxj/U %H" Rs   X"
*X'c                  a \        V R,          4      p\        V R,          4      p\        R4       \        RR 24       \        VP                  4       4       EF  w  po\	        S4      '       g   K  . pWB9   d   VP                  R4       WC9   d   VP                  R4       SP                  R4      '       g   VP                  R4       SP                  R	4      '       g   VP                  R
4       \        V3R l\         4       4      pV^8:  d   VP                  RV R24       V'       g   K  \        V^24      p\        RRP                  V4      R RV 24       EK	  	  R# )zPrint per-property flags.rl   r~   z
  PROPERTY DETAIL FLAGSr   BLOCKEDUNVERIFIABLErM   rd   re   	no_coordsc              3   P   <"   T F  p\        SV4      '       g   K  ^x  K  	  R# 5ir]   rK   r_   s   & r   r;   print_detail.<locals>.<genexpr>=  s     D-q41C-ra   zdata=z/5r   30sNr   )r   r   rA   rB   r   r0   r   rj   rk   r   r   )	r   r   r   unverifiable_setr   rm   rn   r   r   s	   &&      @r   print_detailr   )  s   f^,-K6"567	%'	Bxj/EKKM*	TLL#"LL(xx  LL$xxLL%D-DD7LL5B(5!#r*IBtyy',Byk:;# +r   c                  n   \         P                  ! R R7      p V P                  RRRR7       V P                  RRRR7       V P                  4       p\	        4       pV'       g   \        R4       R	# \        V4      pVP                  '       dm   / VCpVR
,           UUUUu. uF"  w  rVrxRVR,          R\        V4      RVRVRV/NK$  	  uppppVR
&   VR VR \        \        P                  ! V^R7      4       R	# VP                  '       d   \        V4       \        W24       R	# \        V4       R	# u uppppi )zFarmMatch Quality Report)descriptionz--detail
store_truezShow per-property flags)r   helpz--jsonzMachine-readable JSON outputz-No properties in store. Run migrate.py first.Nr   r   rR   
confidencedata_completenessrm   rl   r~   )indent)argparseArgumentParseradd_argument
parse_argsr   r   r   jsonr   dumpsdetailr   r   )	parserargsr   r   outputr*   r   rn   rm   s	            r   mainr   E  s   $$1KLF

<>WX
<Z[DFE=>U#FyyyF '-Wo
 '6" AeHgy|4!4b'5J&5
w
 >"&'djj*+	fV#f
s   (D/
__main__)rM   rN   re   rO   r.   r   )rM   rN   re   rO   building_size_m2)__doc__r   r   collectionsr   pathlibr   r   r   r   r   r   r   r	   r'   rk   r   r   r"   r,   r2   rH   rT   rZ   ro   r   r   r   r   __name__r   r   r   <module>r      s        V V X T 8	$0	 ,%PZz<8> zF r   