
    i-                        d Z ddlZddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ej                  j                  d e e	e      j                                ddlmZ ddlmZmZmZmZ dd	lmZmZmZmZmZmZmZm Z   ejB                  e"      Z#e G d
 d             Z$dde%de$fdZ&d Z'd Z(e"dk(  r ejR                   e(              yy)z
Main pipeline orchestrator for content generation.
Coordinates topic selection, article generation, fact-checking, and publishing.
    N)	dataclass)datetime)Path)Optional)get_settings)ArticleRepositoryPublishedContentRepositoryTopicRepositoryTopicStatus)article_generatorcontent_validatorfact_checkerhtml_converterinternal_link_injectorthumbnail_generatortopic_selectorwordpress_publisherc                       e Zd ZU eed<   dZee   ed<   dZee   ed<   dZ	ee   ed<   dZ
ee   ed<   dZee   ed<   dZee   ed<   dZeed	<   d
 Zy)PipelineResultsuccessNtopic_id
article_id
wp_post_idpost_urlerrorskipped_reasonstatsc                 ,    | j                   i | _         y y )N)r   )selfs    */var/www/html/content-pipeline/pipeline.py__post_init__zPipelineResult.__post_init__3   s    ::DJ     )__name__
__module____qualname__bool__annotations__r   r   intr   r   r   strr   r   r   dictr!    r"   r    r   r   (   sj    M"Hhsm" $J$ $J$"Hhsm"E8C=$(NHSM(E4r"   r   max_retriesreturnc                 n   t               }dt        j                         j                         i}t        j                  d       t        j                         }|s#t        j                  d       t        dd|      S t        j                  d|j                   d|j                   d	       |j                  |d
<   |j                  |d<   t        j                  d       t        j                  |      }|j                  j                  |d<   |j                  j                  dk(  rt        j                  d|j                           t        j"                  |j                  t$        j&                  d|j                           | dkD  r't        j                  d|  d       t)        | dz
        S t        d|j                  d|      S t        j"                  |j                  t$        j*                         t        j                  d       t-        j.                  |      }|j0                  |d<   |j2                  |d<   |j4                  st        j7                  d|j6                          t        j"                  |j                  t$        j8                  d|j6                          t        d|j                  |j6                  |      S t        j                  d|j:                   d       t        j                  d        t=        j>                  |j@                        }|j:                  |d!<   |jB                  |d"<   |jD                  st        j                  d#|j                    d$       t-        j.                  |      }t=        j>                  |j@                        }|jD                  st        j7                  d%|j                           t        j"                  |j                  t$        j8                  d&|j                           t        d|j                  |j                   |      S t        j                  d'       tG        jH                  |j@                  |d()      \  }}}	|	|d*<   |st        j7                  d+|	jK                  d,              t        j"                  |j                  t$        j8                  d-|	jK                  d,              t        d|j                  d.|      S t        j                  d/       tM        jN                  ||jP                  |jR                  d0|j:                  1      \  }}
|
|d2<   t        j                  d3|
        t        j                  d4       tU        jV                  |      }|jX                  |d5<   |j0                  |d6<   |j4                  s"t        j                  d7|j6                          t        j                  d8       t[        j\                  ||jR                  9      }t        j                  d:       t_        j`                  |j                  |j                  |jb                  |||jd                  |j:                  d;tg        jh                  |	      tk        |	jK                  d<g             |j0                  |j0                  z   =      }||d><   |j4                  r t_        jl                  ||jn                         t        j                  d?       t_        jp                  |      }|jr                  r.t        j                  d@       tu        jv                  d;ddAB      }n0tu        jx                  ||j4                  r|jn                  nd0dCD      }|j4                  st        j7                  dE|j6                          t        j"                  |j                  t$        j8                  dF|j6                          t        d|j                  ||j6                  |G      S ddHl=m>}  |       }|j                         }t_        j                  ||j                  |j                  |       tM        j                  ||jP                  |j                  I      }t        j`                  dJ|j                  |jb                  |jd                  ||j                  t        j                         |jR                  K       t        j"                  |j                  t$        j                  |L       |jr                  st        j                  dM       t                t        j                         j                         |dN<   t        j                  dO|j                          t        d;|j                  ||j                  |j                  |P      S )Qa  
    Run the complete content pipeline.

    Steps:
    1. Select highest-priority pending topic
    2. Check semantic deduplication
    3. Generate article with Gemini Flash + web search
    4. Validate content (word count, error patterns)
    5. Fact-check with Gemini Pro (max 2 attempts)
    6. Generate thumbnail
    7. Publish to WordPress
    8. Update database and invalidate caches

    Args:
        max_retries: Maximum topic retries on deduplication skip

    Returns:
        PipelineResult with outcome details
    
started_atzStep 1: Selecting next topiczNo pending topics availableFzNo pending topics)r   r   r   zSelected topic: z (priority: )r   topic_titlez'Step 2: Checking semantic deduplicationdedupe_verdict	duplicatezTopic skipped (duplicate): zDuplicate: )reasonr   zTrying next topic (z retries left)   zAll topics are duplicates)r   r   r   r   zStep 3: Generating articlegeneration_cost_usdgrounding_usedzArticle generation failed: zGeneration failed: )r   r   r   r   zGenerated: z wordszStep 4: Validating content
word_countvalidation_warningszValidation failed: z, retrying generationzValidation failed after retry: zValidation: zStep 5: Fact-checking article   )max_attemptsfact_check_logzFact-check failed: final_verdictzFact-check: zFact-check failedz"Step 5.5: Injecting internal linksN)contenttopic_keywordscontent_clusterexclude_wp_idr8   internal_links_injectedzInternal links: zStep 6: Generating thumbnailthumbnail_providerthumbnail_cost_usdzThumbnail generation failed: z#Step 7: Converting to HTML with CTA)r@   z"Step 8: Saving article to databaseTattempts)r   titleslugcontent_markdowncontent_htmlmeta_descriptionr8   fact_check_passedr<   fact_check_attemptsr6   r   zStep 9: Publishing to WordPressz#DRY RUN: Skipping WordPress publishz	[DRY RUN])r   r   r   blog)thumbnail_pathcategory_slugzWordPress publish failed: z	Publish: )r   r   r   r   r   )WordPressClient)rF   pipeline)sourcerF   rG   summarymain_keywordsr   published_atr@   )r   z&Step 13: Invalidating WordPress cachescompleted_atzPipeline complete: )r   r   r   r   r   r   )Hr   r   now	isoformatloggerinfor   get_next_topicr   rF   priority_scoreidsemantic_dedupe_checkverdictvaluewarningr4   mark_statusr   SKIPPEDrun_pipelineIN_PROGRESSr   generate_articlecost_usdr7   r   r   FAILEDr8   r   validaterH   warningsvalidr   fact_check_with_retrygetr   inject_internal_linkstarget_keywordsr@   r   generate_thumbnailproviderr   convert_with_ctar   insertrG   rJ   jsondumpslenupdate_thumbnail	file_path	get_by_iddry_runr   PublishResultpublish_articlemodules.wordpress_publisherrP   get_next_author_idupdate_wordpress_idsr   wp_media_idextract_linkable_keywordsr	   	PUBLISHED_invalidate_wp_cachesr   )r,   settingsr   topicdedupe_result
gen_result
validationfact_passedfinal_contentfact_log
link_countthumbnail_resultrI   r   articlepublish_resultrP   	wp_client	author_idenhanced_keywordss                       r    rd   rd   8   s'   ( ~H8<<>3356E KK./))+E12.
 	
 KK"5;;-|E<P<P;QQRSTE* ;;E- KK9:"88?M+3399E
""k14]5I5I4JKL""HH !5!5 67	
 ?KK-k].IJa00XX6	
 	
 uxx)@)@A KK,-"33E:J#-#6#6E
 (77E
2:3C3C2DEF""HHk((3FzGWGWFX1Y	
 XX""	
 	
 KK+j334F;< KK,-"++J,G,GHJ$//E,#-#6#6E
 ,Z->->,??TUV '77>
&//
0K0KL
LL::;L;L:MNO&&+,,|JDUDUCV5W " ''	  KK/0+7+M+M##,(K
 'E
*8<<+H*IJK""HH!(,,"?!@A	

 XX%	
 	
 KK45 6 L L,,--((!M: (2E
#$
KK":,/0 KK./*==eD"2";";E
"2";";E
##67G7M7M6NOP KK56!22=RWRgRghL KK45"))__&!#44((zz(+Z <=&//2B2K2KKJ %E,**:7G7Q7QR KK12))*5G9:,::Q
 -<<9I9Q9Q+55W[ 
 !!1.2F2F1GHI""HHk((9^=Q=Q<R1S	
 XX! &&
 	
 <!I,,.I**!!""	 /HHu,,J4D4D %%__++'!,,\\^--	 uxx)>)>:V <=$LLN446E.
KK%n&=&=%>?@!,,(( r"   c                     	 t        j                  g dddd       t        j                  d       y# t         j                  $ r t        j                  d       Y yt         j                  $ r:} t        j                  d| j                  j                                 Y d} ~ yd} ~ wt        $ r t        j                  d	       Y yw xY w)
z%Trigger WordPress cache invalidation.)dockerexecparketry_wpphpzF/var/www/project/dev/cronjobs/posts/cache_post_data_high_frequency.php<   T)timeoutcheckcapture_outputzWordPress caches invalidatedzCache invalidation timed outzCache invalidation failed: Nz+Docker not available for cache invalidation)

subprocessrunrY   rZ   TimeoutExpiredra   CalledProcessErrorstderrdecodeFileNotFoundError)es    r    r   r   <  s    F 	
 	23$$ 756(( J4QXX__5F4GHII FDEFs!   03 (CC/0B$$!CCc                      t        j                  t         j                  d       t               } | j                  r#t
        j                  d| j                          yt
        j                  d| j                  xs | j                          y)zMain entry point.z4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformatzSUCCESS: Published article to r   zFAILED: r5   )
loggingbasicConfigINFOrd   r   rY   rZ   r   r   r   )results    r    mainr   T  sl    llE
 ^F~~4V__4EFGx E0E0EFGHr"   __main__)   )*__doc__rt   r   r   sysdataclassesr   r   pathlibr   typingr   pathrs   r)   __file__parentconfig.settingsr   database.modelsr   r	   r
   r   modulesr   r   r   r   r   r   r   r   	getLoggerr#   rY   r   r(   rd   r   r   exitr+   r"   r    <module>r      s   
    
 !    3tH~,,- . ( 	 	 	 
		8	$   Ac A. AHF0" zCHHTV r"   