
    iT                        d Z ddlZddlZddlZddlZddlmZ ej                  j                  d e	 ee
      j                  j                               ddlZddlmZ ddlmZmZ ddlmZmZ  ej*                  ej,                  d        ej.                  e      Z ej4                  d	      Zd
e	de	fdZdede	defdZdde dede fdZ!de ddfdZ"d Z#edk(  r ejH                   e#              yy)z
Re-optimize internal links for all published articles.
Strips old single-word links and re-injects using LLM-based anchor text.
Updates both database and live WordPress posts.
    N)Path)get_settings)execute_query
get_cursor)html_converterinternal_link_injectorz4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformatz\[([^\]]+)\]\((/blog/[^\)]+)\)markdownreturnc                 .    t         j                  d|       S )z:Remove all internal /blog/ links, keeping the anchor text.z\1)INTERNAL_LINK_PATTERNsub)r   s    :/var/www/html/content-pipeline/scripts/reoptimize_links.pystrip_internal_linksr      s     $$UH55    
wp_post_idcontent_htmlc                 &   t               }|j                  j                  d       d|  }	 t        j                  ||j
                  d|id      }|j                          y# t        $ r%}t        j                  d|  d|        Y d	}~y
d	}~ww xY w)z+Update WordPress post content via REST API./z/wp-json/wp/v2/posts/content<   )authjsontimeoutTzFailed to update WP post : NF)
r   wp_urlrstriprequestspostwp_authraise_for_status	Exceptionloggererror)r   r   settingsapi_urlresponsees         r   update_wp_post_contentr*   $   s    ~H'',--B:,OG==!!\*	
 	!!# 0BqcBCs   5A" "	B+BBarticledry_runc                    | d   }| d   }| d   }| d   }| d   }| d   }| d   }t         j                  |      }	t        |	      }
|	D cg c]  \  }}|	 }}}t        |      }t	        j
                  |||||      \  }}t         j                  |      }|D cg c]  \  }}|	 }}}t        d	 |D              }|||d
d ||
||||ddd}|r|S t        j                  |      }	 t               5 }|j                  d|||f       d|d<   d
d
d
       t        ||      |d<   |S c c}}w c c}}w # 1 sw Y   &xY w# t        $ r%}t        j                  d| d|        Y d
}~Rd
}~ww xY w)zZ
    Re-optimize internal links for a single article.

    Returns dict with results.
    r   idtitle
word_counttarget_keywordscontent_clustercontent_markdown)r   topic_keywordsr2   exclude_wp_idr0   c              3   Z   K   | ]#  }t        |j                               d k\  s d % yw)      N)lensplit).0as     r   	<genexpr>z%reoptimize_article.<locals>.<genexpr>Y   s!     IS^q5H1Is   !++Nr   F)
article_idr   r/   r0   	old_links	new_links
multi_wordold_anchorsnew_anchors
db_updated
wp_updatedzSUPDATE parketry_articles SET content_markdown = %s, content_html = %s WHERE id = %sTrD   zDB update failed for article r   rE   )r   findallr9   r   r   inject_internal_linkssumr   convert_with_ctar   executer#   r$   r%   r*   )r+   r,   r   r>   r/   r0   keywordsclusterr   r?   old_link_countanchor_rB   strippednew_contentnew_link_countr@   rC   multi_word_countresultnew_htmlcursorr)   s                           r   reoptimize_articlerW   7   s    &JJGE&J()H'(G()G &--g6I^N+45ifa65K5 $G,H #9"N"N #K &--k:I+45ifa65K5IkII ! s ##&""F  ..{;HH\ 	(VNNeh
3 $(F< 	( 2*hGF<Mg 6  6.	( 	(  H4ZL1#FGGHs<   	D$D*&
D< 0D0D< 0D95D< <	E*E%%E*c                    t        j                  | d   | d   | d         }	 t               5 }|j                  d|| d   f       ddd       y# 1 sw Y   yxY w# t        $ r(}t
        j                  d| d    d	|        Y d}~yd}~ww xY w)
zRUpdate the published_content table with enhanced keywords including title phrases.r3   r1   r/   )r/   zNUPDATE parketry_published_content SET main_keywords = %s WHERE wp_post_id = %sr   Nz)Failed to update keywords for wp_post_id=r   )r   extract_linkable_keywordsr   rJ   r#   r$   r%   )r+   enhancedrV   r)   s       r   !update_published_content_keywordsr[   ~   s    %??"#!"gH
_\ 	VNN`7<01	 	 	
  _@AV@WWYZ[Y\]^^_s4   
A AA AA A 	B#BBc                     dt         j                  v } | rt        j                  d       t	        d      }|st        j                  d       yt        j                  dt        |       d       g }t        |d      D ]  \  }}t        j                  d	| d
t        |       d|d   d d  d       t        ||       }|j                  |       | st        |       t        j                  d|d    d|d    d|d    d       |d   rG|d   D ]?  }t        |j                               }|dk\  rdnd}t        j                  d| d|        A |t        |      k  st        j                  d        t        d       t        d       t        d        t        d! |D              }	t        d" |D              }
t        d# |D              }t        d$t        |              t        d%|	 d|
        t        d&| d
|
 d'|t        |
d      z  d(z  d)d*       | sXt        d+ |D              }t        d, |D              }t        d-| d
t        |              t        d.| d
t        |              y)/Nz	--dry-runz&DRY RUN MODE - no changes will be madeaD  
        SELECT a.id, a.title, a.word_count, a.wp_post_id, a.content_markdown,
               t.target_keywords, t.content_cluster
        FROM parketry_articles a
        JOIN parketry_content_topics t ON a.topic_id = t.id
        WHERE a.wp_post_id IS NOT NULL AND a.wp_post_id > 0
        ORDER BY a.published_at ASC
    zNo published articles foundr   zFound z" published articles to re-optimizer8   z
[r   z] Processing: r/   r   z...)r,   z	  Links: r?   z -> r@   z (multi-word: rA   )rC   r7   MWSWz    [z]    zG
======================================================================SUMMARYzF======================================================================c              3   &   K   | ]	  }|d      yw)r?   N r;   rs     r   r=   zmain.<locals>.<genexpr>        4qAkN4   c              3   &   K   | ]	  }|d      yw)r@   Nrc   rd   s     r   r=   zmain.<locals>.<genexpr>   rf   rg   c              3   &   K   | ]	  }|d      yw)rA   Nrc   rd   s     r   r=   zmain.<locals>.<genexpr>   s     7!ao7rg   zArticles processed: zTotal links: zMulti-word anchors: z (d   z.0fz%)c              3   ,   K   | ]  }|d    s	d  yw)rD   r8   Nrc   rd   s     r   r=   zmain.<locals>.<genexpr>        :!!L/A:   
c              3   ,   K   | ]  }|d    s	d  yw)rE   r8   Nrc   rd   s     r   r=   zmain.<locals>.<genexpr>   rl   rm   zDB updated: zWP updated: )sysargvr$   infor   r9   	enumeraterW   appendr[   r:   timesleepprintrH   max)r,   rowsresultsir+   rT   r<   wcmarker	total_old	total_newtotal_multidb_okwp_oks                 r   mainr      s~   SXX%G<=   	D 12
KK&T#EFGGa( 
7c!Ac$i[ww7G7L6MSQR#GW=v -g6{+,D1D0E F"<014	
 - M* 3^!#qdeF82aS123 s4y=JJqM-2 
/	)	(O4G44I4G44I7w77K	 W
/0	M)D
45	 QykKIVWHX<XY\<\]`;aac
de:w:::w::UG1S\N34UG1S\N34r   __main__)F)%__doc__loggingrero   rt   pathlibr   pathinsertstr__file__parentr   config.settingsr   database.connectionr   r   modulesr   r   basicConfigINFO	getLogger__name__r$   compiler   r   intboolr*   dictrW   r[   r   exitrc   r   r   <module>r      s    	 
   3tH~,,334 5  ( 9 :   
,,A 
		8	$ #

#DE 63 63 6
s # $ &D Dt D DN_t _ _">B zCHHTV r   